diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
commit | 76cb841cb886eef6b3bee341a2266c76578724ad (patch) | |
tree | f5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /drivers/net/ethernet/mellanox/mlx5 | |
parent | Initial commit. (diff) | |
download | linux-76cb841cb886eef6b3bee341a2266c76578724ad.tar.xz linux-76cb841cb886eef6b3bee341a2266c76578724ad.zip |
Adding upstream version 4.19.249.upstream/4.19.249
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5')
114 files changed, 57917 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig new file mode 100644 index 000000000..b7e3b8902 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -0,0 +1,120 @@ +# +# Mellanox driver configuration +# + +config MLX5_CORE + tristate "Mellanox 5th generation network adapters (ConnectX series) core driver" + depends on MAY_USE_DEVLINK + depends on PCI + imply PTP_1588_CLOCK + imply VXLAN + imply MLXFW + default n + ---help--- + Core driver for low level functionality of the ConnectX-4 and + Connect-IB cards by Mellanox Technologies. + +config MLX5_ACCEL + bool + +config MLX5_FPGA + bool "Mellanox Technologies Innova support" + depends on MLX5_CORE + select MLX5_ACCEL + ---help--- + Build support for the Innova family of network cards by Mellanox + Technologies. Innova network cards are comprised of a ConnectX chip + and an FPGA chip on one board. If you select this option, the + mlx5_core driver will include the Innova FPGA core and allow building + sandbox-specific client drivers. + +config MLX5_CORE_EN + bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support" + depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE + depends on IPV6=y || IPV6=n || MLX5_CORE=m + select PAGE_POOL + default n + ---help--- + Ethernet support in Mellanox Technologies ConnectX-4 NIC. + +config MLX5_EN_ARFS + bool "Mellanox MLX5 ethernet accelerated receive flow steering (ARFS) support" + depends on MLX5_CORE_EN && RFS_ACCEL + default y + ---help--- + Mellanox MLX5 ethernet hardware-accelerated receive flow steering support, + Enables ethernet netdevice arfs support and ntuple filtering. + +config MLX5_EN_RXNFC + bool "Mellanox MLX5 ethernet rx nfc flow steering support" + depends on MLX5_CORE_EN + default y + ---help--- + Mellanox MLX5 ethernet rx nfc flow steering support + Enables ethtool receive network flow classification, which allows user defined + flow rules to direct traffic into arbitrary rx queue via ethtool set/get_rxnfc + API. + +config MLX5_MPFS + bool "Mellanox Technologies MLX5 MPFS support" + depends on MLX5_CORE_EN + default y + ---help--- + Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS) + support in ConnectX NIC. MPFs is required for when multi-PF configuration + is enabled to allow passing user configured unicast MAC addresses to the + requesting PF. + +config MLX5_ESWITCH + bool "Mellanox Technologies MLX5 SRIOV E-Switch support" + depends on MLX5_CORE_EN && NET_SWITCHDEV + default y + ---help--- + Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC. + E-Switch provides internal SRIOV packet steering and switching for the + enabled VFs and PF in two available modes: + Legacy SRIOV mode (L2 mac vlan steering based). + Switchdev mode (eswitch offloads). + +config MLX5_CORE_EN_DCB + bool "Data Center Bridging (DCB) Support" + default y + depends on MLX5_CORE_EN && DCB + ---help--- + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. + If set to N, will not be able to configure QoS and ratelimit attributes. + This flag is depended on the kernel's DCB support. + + If unsure, set to Y + +config MLX5_CORE_IPOIB + bool "Mellanox 5th generation network adapters (connectX series) IPoIB offloads support" + depends on MLX5_CORE_EN + default n + ---help--- + MLX5 IPoIB offloads & acceleration support. + +config MLX5_EN_IPSEC + bool "IPSec XFRM cryptography-offload accelaration" + depends on MLX5_ACCEL + depends on MLX5_CORE_EN + depends on XFRM_OFFLOAD + depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD + default n + ---help--- + Build support for IPsec cryptography-offload accelaration in the NIC. + Note: Support for hardware with this capability needs to be selected + for this option to become available. + +config MLX5_EN_TLS + bool "TLS cryptography-offload accelaration" + depends on MLX5_CORE_EN + depends on TLS_DEVICE + depends on TLS=y || MLX5_CORE=m + depends on MLX5_ACCEL + default n + ---help--- + Build support for TLS cryptography-offload accelaration in the NIC. + Note: Support for hardware with this capability needs to be selected + for this option to become available. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile new file mode 100644 index 000000000..d324a3884 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Mellanox 5th generation network adapters +# (ConnectX series) core & netdev driver +# + +subdir-ccflags-y += -I$(src) + +obj-$(CONFIG_MLX5_CORE) += mlx5_core.o + +# +# mlx5 core basic +# +mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ + health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ + mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ + diag/fs_tracepoint.o diag/fw_tracer.o + +# +# Netdev basic +# +mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ + en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ + en_selftest.o en/port.o + +# +# Netdev extra +# +mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o +mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o +mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o + +# +# Core extra +# +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o +mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o +mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o +mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o + +# +# Ipoib netdev +# +mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o + +# +# Accelerations & FPGA +# +mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o + +mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ + fpga/ipsec.o fpga/tls.o + +mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ + en_accel/ipsec_stats.o + +mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o + +CFLAGS_tracepoint.o := -I$(src) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile new file mode 100644 index 000000000..d8e17110f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h new file mode 100644 index 000000000..c13260467 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h @@ -0,0 +1,37 @@ +#ifndef __MLX5E_ACCEL_H__ +#define __MLX5E_ACCEL_H__ + +#ifdef CONFIG_MLX5_ACCEL + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include "en.h" + +static inline bool is_metadata_hdr_valid(struct sk_buff *skb) +{ + __be16 *ethtype; + + if (unlikely(skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN)) + return false; + ethtype = (__be16 *)(skb->data + ETH_ALEN * 2); + if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE)) + return false; + return true; +} + +static inline void remove_metadata_hdr(struct sk_buff *skb) +{ + struct ethhdr *old_eth; + struct ethhdr *new_eth; + + /* Remove the metadata from the buffer */ + old_eth = (struct ethhdr *)skb->data; + new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN); + memmove(new_eth, old_eth, 2 * ETH_ALEN); + /* Ethertype is already in its new place */ + skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN); +} + +#endif /* CONFIG_MLX5_ACCEL */ + +#endif /* __MLX5E_EN_ACCEL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c new file mode 100644 index 000000000..9f1b19397 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/device.h> + +#include "accel/ipsec.h" +#include "mlx5_core.h" +#include "fpga/ipsec.h" + +u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_ipsec_device_caps(mdev); +} +EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps); + +unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_ipsec_counters_count(mdev); +} + +int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, + unsigned int count) +{ + return mlx5_fpga_ipsec_counters_read(mdev, counters, count); +} + +void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *xfrm, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6) +{ + return mlx5_fpga_ipsec_create_sa_ctx(mdev, xfrm, saddr, daddr, + spi, is_ipv6); +} + +void mlx5_accel_esp_free_hw_context(void *context) +{ + mlx5_fpga_ipsec_delete_sa_ctx(context); +} + +int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_ipsec_init(mdev); +} + +void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) +{ + mlx5_fpga_ipsec_cleanup(mdev); +} + +struct mlx5_accel_esp_xfrm * +mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs, + u32 flags) +{ + struct mlx5_accel_esp_xfrm *xfrm; + + xfrm = mlx5_fpga_esp_create_xfrm(mdev, attrs, flags); + if (IS_ERR(xfrm)) + return xfrm; + + xfrm->mdev = mdev; + return xfrm; +} +EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm); + +void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) +{ + mlx5_fpga_esp_destroy_xfrm(xfrm); +} +EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm); + +int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + return mlx5_fpga_esp_modify_xfrm(xfrm, attrs); +} +EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h new file mode 100644 index 000000000..024dbd22a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_ACCEL_IPSEC_H__ +#define __MLX5_ACCEL_IPSEC_H__ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/accel.h> + +#ifdef CONFIG_MLX5_ACCEL + +#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \ + MLX5_ACCEL_IPSEC_CAP_DEVICE) + +unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev); +int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, + unsigned int count); + +void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *xfrm, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6); +void mlx5_accel_esp_free_hw_context(void *context); + +int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev); +void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev); + +#else + +#define MLX5_IPSEC_DEV(mdev) false + +static inline void * +mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *xfrm, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6) +{ + return NULL; +} + +static inline void mlx5_accel_esp_free_hw_context(void *context) +{ +} + +static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) +{ +} + +#endif + +#endif /* __MLX5_ACCEL_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c new file mode 100644 index 000000000..da7bd2636 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/device.h> + +#include "accel/tls.h" +#include "mlx5_core.h" +#include "fpga/tls.h" + +int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn, u32 *p_swid, + bool direction_sx) +{ + return mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, + start_offload_tcp_sn, p_swid, + direction_sx); +} + +void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, + bool direction_sx) +{ + mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx); +} + +int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, + u64 rcd_sn) +{ + return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn); +} + +bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_is_tls_device(mdev); +} + +u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_tls_device_caps(mdev); +} + +int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_tls_init(mdev); +} + +void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) +{ + mlx5_fpga_tls_cleanup(mdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h new file mode 100644 index 000000000..def4093eb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_ACCEL_TLS_H__ +#define __MLX5_ACCEL_TLS_H__ + +#include <linux/mlx5/driver.h> +#include <linux/tls.h> + +#ifdef CONFIG_MLX5_ACCEL + +enum { + MLX5_ACCEL_TLS_TX = BIT(0), + MLX5_ACCEL_TLS_RX = BIT(1), + MLX5_ACCEL_TLS_V12 = BIT(2), + MLX5_ACCEL_TLS_V13 = BIT(3), + MLX5_ACCEL_TLS_LRO = BIT(4), + MLX5_ACCEL_TLS_IPV6 = BIT(5), + MLX5_ACCEL_TLS_AES_GCM128 = BIT(30), + MLX5_ACCEL_TLS_AES_GCM256 = BIT(31), +}; + +struct mlx5_ifc_tls_flow_bits { + u8 src_port[0x10]; + u8 dst_port[0x10]; + union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; + union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; + u8 ipv6[0x1]; + u8 direction_sx[0x1]; + u8 reserved_at_2[0x1e]; +}; + +int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn, u32 *p_swid, + bool direction_sx); +void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, + bool direction_sx); +int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, + u64 rcd_sn); +bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev); +u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev); +int mlx5_accel_tls_init(struct mlx5_core_dev *mdev); +void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev); + +#else + +static inline int +mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn, u32 *p_swid, + bool direction_sx) { return -ENOTSUPP; } +static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, + bool direction_sx) { } +static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, + u32 seq, u64 rcd_sn) { return 0; } +static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; } +static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; } +static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; } +static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { } + +#endif + +#endif /* __MLX5_ACCEL_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c new file mode 100644 index 000000000..456f30007 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -0,0 +1,311 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/export.h> +#include <linux/bitmap.h> +#include <linux/dma-mapping.h> +#include <linux/vmalloc.h> +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" + +struct mlx5_db_pgdir { + struct list_head list; + unsigned long *bitmap; + __be32 *db_page; + dma_addr_t db_dma; +}; + +/* Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. + */ + +static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, + size_t size, dma_addr_t *dma_handle, + int node) +{ + struct mlx5_priv *priv = &dev->priv; + int original_node; + void *cpu_handle; + + mutex_lock(&priv->alloc_mutex); + original_node = dev_to_node(&dev->pdev->dev); + set_dev_node(&dev->pdev->dev, node); + cpu_handle = dma_zalloc_coherent(&dev->pdev->dev, size, + dma_handle, GFP_KERNEL); + set_dev_node(&dev->pdev->dev, original_node); + mutex_unlock(&priv->alloc_mutex); + return cpu_handle; +} + +int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_frag_buf *buf, int node) +{ + dma_addr_t t; + + buf->size = size; + buf->npages = 1; + buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; + + buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL); + if (!buf->frags) + return -ENOMEM; + + buf->frags->buf = mlx5_dma_zalloc_coherent_node(dev, size, + &t, node); + if (!buf->frags->buf) + goto err_out; + + buf->frags->map = t; + + while (t & ((1 << buf->page_shift) - 1)) { + --buf->page_shift; + buf->npages *= 2; + } + + return 0; +err_out: + kfree(buf->frags); + return -ENOMEM; +} + +int mlx5_buf_alloc(struct mlx5_core_dev *dev, + int size, struct mlx5_frag_buf *buf) +{ + return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node); +} +EXPORT_SYMBOL(mlx5_buf_alloc); + +void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) +{ + dma_free_coherent(&dev->pdev->dev, buf->size, buf->frags->buf, + buf->frags->map); + + kfree(buf->frags); +} +EXPORT_SYMBOL_GPL(mlx5_buf_free); + +int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_frag_buf *buf, int node) +{ + int i; + + buf->size = size; + buf->npages = DIV_ROUND_UP(size, PAGE_SIZE); + buf->page_shift = PAGE_SHIFT; + buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list), + GFP_KERNEL); + if (!buf->frags) + goto err_out; + + for (i = 0; i < buf->npages; i++) { + struct mlx5_buf_list *frag = &buf->frags[i]; + int frag_sz = min_t(int, size, PAGE_SIZE); + + frag->buf = mlx5_dma_zalloc_coherent_node(dev, frag_sz, + &frag->map, node); + if (!frag->buf) + goto err_free_buf; + if (frag->map & ((1 << buf->page_shift) - 1)) { + dma_free_coherent(&dev->pdev->dev, frag_sz, + buf->frags[i].buf, buf->frags[i].map); + mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n", + &frag->map, buf->page_shift); + goto err_free_buf; + } + size -= frag_sz; + } + + return 0; + +err_free_buf: + while (i--) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, buf->frags[i].buf, + buf->frags[i].map); + kfree(buf->frags); +err_out: + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node); + +void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) +{ + int size = buf->size; + int i; + + for (i = 0; i < buf->npages; i++) { + int frag_sz = min_t(int, size, PAGE_SIZE); + + dma_free_coherent(&dev->pdev->dev, frag_sz, buf->frags[i].buf, + buf->frags[i].map); + size -= frag_sz; + } + kfree(buf->frags); +} +EXPORT_SYMBOL_GPL(mlx5_frag_buf_free); + +static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, + int node) +{ + u32 db_per_page = PAGE_SIZE / cache_line_size(); + struct mlx5_db_pgdir *pgdir; + + pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); + if (!pgdir) + return NULL; + + pgdir->bitmap = kcalloc(BITS_TO_LONGS(db_per_page), + sizeof(unsigned long), + GFP_KERNEL); + + if (!pgdir->bitmap) { + kfree(pgdir); + return NULL; + } + + bitmap_fill(pgdir->bitmap, db_per_page); + + pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE, + &pgdir->db_dma, node); + if (!pgdir->db_page) { + kfree(pgdir->bitmap); + kfree(pgdir); + return NULL; + } + + return pgdir; +} + +static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir, + struct mlx5_db *db) +{ + u32 db_per_page = PAGE_SIZE / cache_line_size(); + int offset; + int i; + + i = find_first_bit(pgdir->bitmap, db_per_page); + if (i >= db_per_page) + return -ENOMEM; + + __clear_bit(i, pgdir->bitmap); + + db->u.pgdir = pgdir; + db->index = i; + offset = db->index * cache_line_size(); + db->db = pgdir->db_page + offset / sizeof(*pgdir->db_page); + db->dma = pgdir->db_dma + offset; + + db->db[0] = 0; + db->db[1] = 0; + + return 0; +} + +int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node) +{ + struct mlx5_db_pgdir *pgdir; + int ret = 0; + + mutex_lock(&dev->priv.pgdir_mutex); + + list_for_each_entry(pgdir, &dev->priv.pgdir_list, list) + if (!mlx5_alloc_db_from_pgdir(pgdir, db)) + goto out; + + pgdir = mlx5_alloc_db_pgdir(dev, node); + if (!pgdir) { + ret = -ENOMEM; + goto out; + } + + list_add(&pgdir->list, &dev->priv.pgdir_list); + + /* This should never fail -- we just allocated an empty page: */ + WARN_ON(mlx5_alloc_db_from_pgdir(pgdir, db)); + +out: + mutex_unlock(&dev->priv.pgdir_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx5_db_alloc_node); + +int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db) +{ + return mlx5_db_alloc_node(dev, db, dev->priv.numa_node); +} +EXPORT_SYMBOL_GPL(mlx5_db_alloc); + +void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) +{ + u32 db_per_page = PAGE_SIZE / cache_line_size(); + + mutex_lock(&dev->priv.pgdir_mutex); + + __set_bit(db->index, db->u.pgdir->bitmap); + + if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) { + dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + db->u.pgdir->db_page, db->u.pgdir->db_dma); + list_del(&db->u.pgdir->list); + kfree(db->u.pgdir->bitmap); + kfree(db->u.pgdir); + } + + mutex_unlock(&dev->priv.pgdir_mutex); +} +EXPORT_SYMBOL_GPL(mlx5_db_free); + +void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas) +{ + u64 addr; + int i; + + for (i = 0; i < buf->npages; i++) { + addr = buf->frags->map + (i << buf->page_shift); + + pas[i] = cpu_to_be64(addr); + } +} +EXPORT_SYMBOL_GPL(mlx5_fill_page_array); + +void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas) +{ + int i; + + for (i = 0; i < buf->npages; i++) + pas[i] = cpu_to_be64(buf->frags[i].map); +} +EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c new file mode 100644 index 000000000..a68608276 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -0,0 +1,1919 @@ +/* + * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/highmem.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/random.h> +#include <linux/io-mapping.h> +#include <linux/mlx5/driver.h> +#include <linux/debugfs.h> + +#include "mlx5_core.h" + +enum { + CMD_IF_REV = 5, +}; + +enum { + CMD_MODE_POLLING, + CMD_MODE_EVENTS +}; + +enum { + MLX5_CMD_DELIVERY_STAT_OK = 0x0, + MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR = 0x1, + MLX5_CMD_DELIVERY_STAT_TOK_ERR = 0x2, + MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR = 0x3, + MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR = 0x4, + MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR = 0x5, + MLX5_CMD_DELIVERY_STAT_FW_ERR = 0x6, + MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR = 0x7, + MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR = 0x8, + MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR = 0x9, + MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, +}; + +static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, + struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, + void *uout, int uout_size, + mlx5_cmd_cbk_t cbk, + void *context, int page_queue) +{ + gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL; + struct mlx5_cmd_work_ent *ent; + + ent = kzalloc(sizeof(*ent), alloc_flags); + if (!ent) + return ERR_PTR(-ENOMEM); + + ent->in = in; + ent->out = out; + ent->uout = uout; + ent->uout_size = uout_size; + ent->callback = cbk; + ent->context = context; + ent->cmd = cmd; + ent->page_queue = page_queue; + + return ent; +} + +static u8 alloc_token(struct mlx5_cmd *cmd) +{ + u8 token; + + spin_lock(&cmd->token_lock); + cmd->token++; + if (cmd->token == 0) + cmd->token++; + token = cmd->token; + spin_unlock(&cmd->token_lock); + + return token; +} + +static int alloc_ent(struct mlx5_cmd *cmd) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cmd->alloc_lock, flags); + ret = find_first_bit(&cmd->bitmask, cmd->max_reg_cmds); + if (ret < cmd->max_reg_cmds) + clear_bit(ret, &cmd->bitmask); + spin_unlock_irqrestore(&cmd->alloc_lock, flags); + + return ret < cmd->max_reg_cmds ? ret : -ENOMEM; +} + +static void free_ent(struct mlx5_cmd *cmd, int idx) +{ + unsigned long flags; + + spin_lock_irqsave(&cmd->alloc_lock, flags); + set_bit(idx, &cmd->bitmask); + spin_unlock_irqrestore(&cmd->alloc_lock, flags); +} + +static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) +{ + return cmd->cmd_buf + (idx << cmd->log_stride); +} + +static int mlx5_calc_cmd_blocks(struct mlx5_cmd_msg *msg) +{ + int size = msg->len; + int blen = size - min_t(int, sizeof(msg->first.data), size); + + return DIV_ROUND_UP(blen, MLX5_CMD_DATA_BLOCK_SIZE); +} + +static u8 xor8_buf(void *buf, size_t offset, int len) +{ + u8 *ptr = buf; + u8 sum = 0; + int i; + int end = len + offset; + + for (i = offset; i < end; i++) + sum ^= ptr[i]; + + return sum; +} + +static int verify_block_sig(struct mlx5_cmd_prot_block *block) +{ + size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); + int xor_len = sizeof(*block) - sizeof(block->data) - 1; + + if (xor8_buf(block, rsvd0_off, xor_len) != 0xff) + return -EINVAL; + + if (xor8_buf(block, 0, sizeof(*block)) != 0xff) + return -EINVAL; + + return 0; +} + +static void calc_block_sig(struct mlx5_cmd_prot_block *block) +{ + int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2; + size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); + + block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len); + block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1); +} + +static void calc_chain_sig(struct mlx5_cmd_msg *msg) +{ + struct mlx5_cmd_mailbox *next = msg->next; + int n = mlx5_calc_cmd_blocks(msg); + int i = 0; + + for (i = 0; i < n && next; i++) { + calc_block_sig(next->buf); + next = next->next; + } +} + +static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) +{ + ent->lay->sig = ~xor8_buf(ent->lay, 0, sizeof(*ent->lay)); + if (csum) { + calc_chain_sig(ent->in); + calc_chain_sig(ent->out); + } +} + +static void poll_timeout(struct mlx5_cmd_work_ent *ent) +{ + unsigned long poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000); + u8 own; + + do { + own = READ_ONCE(ent->lay->status_own); + if (!(own & CMD_OWNER_HW)) { + ent->ret = 0; + return; + } + cond_resched(); + } while (time_before(jiffies, poll_end)); + + ent->ret = -ETIMEDOUT; +} + +static void free_cmd(struct mlx5_cmd_work_ent *ent) +{ + kfree(ent); +} + +static int verify_signature(struct mlx5_cmd_work_ent *ent) +{ + struct mlx5_cmd_mailbox *next = ent->out->next; + int n = mlx5_calc_cmd_blocks(ent->out); + int err; + u8 sig; + int i = 0; + + sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay)); + if (sig != 0xff) + return -EINVAL; + + for (i = 0; i < n && next; i++) { + err = verify_block_sig(next->buf); + if (err) + return err; + + next = next->next; + } + + return 0; +} + +static void dump_buf(void *buf, int size, int data_only, int offset) +{ + __be32 *p = buf; + int i; + + for (i = 0; i < size; i += 16) { + pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]), + be32_to_cpu(p[1]), be32_to_cpu(p[2]), + be32_to_cpu(p[3])); + p += 4; + offset += 16; + } + if (!data_only) + pr_debug("\n"); +} + +static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, + u32 *synd, u8 *status) +{ + *synd = 0; + *status = 0; + + switch (op) { + case MLX5_CMD_OP_TEARDOWN_HCA: + case MLX5_CMD_OP_DISABLE_HCA: + case MLX5_CMD_OP_MANAGE_PAGES: + case MLX5_CMD_OP_DESTROY_MKEY: + case MLX5_CMD_OP_DESTROY_EQ: + case MLX5_CMD_OP_DESTROY_CQ: + case MLX5_CMD_OP_DESTROY_QP: + case MLX5_CMD_OP_DESTROY_PSV: + case MLX5_CMD_OP_DESTROY_SRQ: + case MLX5_CMD_OP_DESTROY_XRC_SRQ: + case MLX5_CMD_OP_DESTROY_XRQ: + case MLX5_CMD_OP_DESTROY_DCT: + case MLX5_CMD_OP_DEALLOC_Q_COUNTER: + case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT: + case MLX5_CMD_OP_DEALLOC_PD: + case MLX5_CMD_OP_DEALLOC_UAR: + case MLX5_CMD_OP_DETACH_FROM_MCG: + case MLX5_CMD_OP_DEALLOC_XRCD: + case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY: + case MLX5_CMD_OP_DESTROY_LAG: + case MLX5_CMD_OP_DESTROY_VPORT_LAG: + case MLX5_CMD_OP_DESTROY_TIR: + case MLX5_CMD_OP_DESTROY_SQ: + case MLX5_CMD_OP_DESTROY_RQ: + case MLX5_CMD_OP_DESTROY_RMP: + case MLX5_CMD_OP_DESTROY_TIS: + case MLX5_CMD_OP_DESTROY_RQT: + case MLX5_CMD_OP_DESTROY_FLOW_TABLE: + case MLX5_CMD_OP_DESTROY_FLOW_GROUP: + case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: + case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_FPGA_DESTROY_QP: + case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: + return MLX5_CMD_STAT_OK; + + case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_ADAPTER: + case MLX5_CMD_OP_INIT_HCA: + case MLX5_CMD_OP_ENABLE_HCA: + case MLX5_CMD_OP_QUERY_PAGES: + case MLX5_CMD_OP_SET_HCA_CAP: + case MLX5_CMD_OP_QUERY_ISSI: + case MLX5_CMD_OP_SET_ISSI: + case MLX5_CMD_OP_CREATE_MKEY: + case MLX5_CMD_OP_QUERY_MKEY: + case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: + case MLX5_CMD_OP_PAGE_FAULT_RESUME: + case MLX5_CMD_OP_CREATE_EQ: + case MLX5_CMD_OP_QUERY_EQ: + case MLX5_CMD_OP_GEN_EQE: + case MLX5_CMD_OP_CREATE_CQ: + case MLX5_CMD_OP_QUERY_CQ: + case MLX5_CMD_OP_MODIFY_CQ: + case MLX5_CMD_OP_CREATE_QP: + case MLX5_CMD_OP_RST2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_RTR2RTS_QP: + case MLX5_CMD_OP_RTS2RTS_QP: + case MLX5_CMD_OP_SQERR2RTS_QP: + case MLX5_CMD_OP_QUERY_QP: + case MLX5_CMD_OP_SQD_RTS_QP: + case MLX5_CMD_OP_INIT2INIT_QP: + case MLX5_CMD_OP_CREATE_PSV: + case MLX5_CMD_OP_CREATE_SRQ: + case MLX5_CMD_OP_QUERY_SRQ: + case MLX5_CMD_OP_ARM_RQ: + case MLX5_CMD_OP_CREATE_XRC_SRQ: + case MLX5_CMD_OP_QUERY_XRC_SRQ: + case MLX5_CMD_OP_ARM_XRC_SRQ: + case MLX5_CMD_OP_CREATE_XRQ: + case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_CREATE_DCT: + case MLX5_CMD_OP_DRAIN_DCT: + case MLX5_CMD_OP_QUERY_DCT: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + case MLX5_CMD_OP_QUERY_VPORT_STATE: + case MLX5_CMD_OP_MODIFY_VPORT_STATE: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: + case MLX5_CMD_OP_SET_ROCE_ADDRESS: + case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_HCA_VPORT_GID: + case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY: + case MLX5_CMD_OP_QUERY_VNIC_ENV: + case MLX5_CMD_OP_QUERY_VPORT_COUNTER: + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_SET_PP_RATE_LIMIT: + case MLX5_CMD_OP_QUERY_RATE_LIMIT: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT: + case MLX5_CMD_OP_ALLOC_PD: + case MLX5_CMD_OP_ALLOC_UAR: + case MLX5_CMD_OP_CONFIG_INT_MODERATION: + case MLX5_CMD_OP_ACCESS_REG: + case MLX5_CMD_OP_ATTACH_TO_MCG: + case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG: + case MLX5_CMD_OP_MAD_IFC: + case MLX5_CMD_OP_QUERY_MAD_DEMUX: + case MLX5_CMD_OP_SET_MAD_DEMUX: + case MLX5_CMD_OP_NOP: + case MLX5_CMD_OP_ALLOC_XRCD: + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_QUERY_CONG_STATUS: + case MLX5_CMD_OP_MODIFY_CONG_STATUS: + case MLX5_CMD_OP_QUERY_CONG_PARAMS: + case MLX5_CMD_OP_MODIFY_CONG_PARAMS: + case MLX5_CMD_OP_QUERY_CONG_STATISTICS: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + case MLX5_CMD_OP_CREATE_LAG: + case MLX5_CMD_OP_MODIFY_LAG: + case MLX5_CMD_OP_QUERY_LAG: + case MLX5_CMD_OP_CREATE_VPORT_LAG: + case MLX5_CMD_OP_CREATE_TIR: + case MLX5_CMD_OP_MODIFY_TIR: + case MLX5_CMD_OP_QUERY_TIR: + case MLX5_CMD_OP_CREATE_SQ: + case MLX5_CMD_OP_MODIFY_SQ: + case MLX5_CMD_OP_QUERY_SQ: + case MLX5_CMD_OP_CREATE_RQ: + case MLX5_CMD_OP_MODIFY_RQ: + case MLX5_CMD_OP_QUERY_RQ: + case MLX5_CMD_OP_CREATE_RMP: + case MLX5_CMD_OP_MODIFY_RMP: + case MLX5_CMD_OP_QUERY_RMP: + case MLX5_CMD_OP_CREATE_TIS: + case MLX5_CMD_OP_MODIFY_TIS: + case MLX5_CMD_OP_QUERY_TIS: + case MLX5_CMD_OP_CREATE_RQT: + case MLX5_CMD_OP_MODIFY_RQT: + case MLX5_CMD_OP_QUERY_RQT: + + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_FPGA_CREATE_QP: + case MLX5_CMD_OP_FPGA_MODIFY_QP: + case MLX5_CMD_OP_FPGA_QUERY_QP: + case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS: + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + *status = MLX5_DRIVER_STATUS_ABORTED; + *synd = MLX5_DRIVER_SYND; + return -EIO; + default: + mlx5_core_err(dev, "Unknown FW command (%d)\n", op); + return -EINVAL; + } +} + +const char *mlx5_command_str(int command) +{ +#define MLX5_COMMAND_STR_CASE(__cmd) case MLX5_CMD_OP_ ## __cmd: return #__cmd + + switch (command) { + MLX5_COMMAND_STR_CASE(QUERY_HCA_CAP); + MLX5_COMMAND_STR_CASE(QUERY_ADAPTER); + MLX5_COMMAND_STR_CASE(INIT_HCA); + MLX5_COMMAND_STR_CASE(TEARDOWN_HCA); + MLX5_COMMAND_STR_CASE(ENABLE_HCA); + MLX5_COMMAND_STR_CASE(DISABLE_HCA); + MLX5_COMMAND_STR_CASE(QUERY_PAGES); + MLX5_COMMAND_STR_CASE(MANAGE_PAGES); + MLX5_COMMAND_STR_CASE(SET_HCA_CAP); + MLX5_COMMAND_STR_CASE(QUERY_ISSI); + MLX5_COMMAND_STR_CASE(SET_ISSI); + MLX5_COMMAND_STR_CASE(SET_DRIVER_VERSION); + MLX5_COMMAND_STR_CASE(CREATE_MKEY); + MLX5_COMMAND_STR_CASE(QUERY_MKEY); + MLX5_COMMAND_STR_CASE(DESTROY_MKEY); + MLX5_COMMAND_STR_CASE(QUERY_SPECIAL_CONTEXTS); + MLX5_COMMAND_STR_CASE(PAGE_FAULT_RESUME); + MLX5_COMMAND_STR_CASE(CREATE_EQ); + MLX5_COMMAND_STR_CASE(DESTROY_EQ); + MLX5_COMMAND_STR_CASE(QUERY_EQ); + MLX5_COMMAND_STR_CASE(GEN_EQE); + MLX5_COMMAND_STR_CASE(CREATE_CQ); + MLX5_COMMAND_STR_CASE(DESTROY_CQ); + MLX5_COMMAND_STR_CASE(QUERY_CQ); + MLX5_COMMAND_STR_CASE(MODIFY_CQ); + MLX5_COMMAND_STR_CASE(CREATE_QP); + MLX5_COMMAND_STR_CASE(DESTROY_QP); + MLX5_COMMAND_STR_CASE(RST2INIT_QP); + MLX5_COMMAND_STR_CASE(INIT2RTR_QP); + MLX5_COMMAND_STR_CASE(RTR2RTS_QP); + MLX5_COMMAND_STR_CASE(RTS2RTS_QP); + MLX5_COMMAND_STR_CASE(SQERR2RTS_QP); + MLX5_COMMAND_STR_CASE(2ERR_QP); + MLX5_COMMAND_STR_CASE(2RST_QP); + MLX5_COMMAND_STR_CASE(QUERY_QP); + MLX5_COMMAND_STR_CASE(SQD_RTS_QP); + MLX5_COMMAND_STR_CASE(INIT2INIT_QP); + MLX5_COMMAND_STR_CASE(CREATE_PSV); + MLX5_COMMAND_STR_CASE(DESTROY_PSV); + MLX5_COMMAND_STR_CASE(CREATE_SRQ); + MLX5_COMMAND_STR_CASE(DESTROY_SRQ); + MLX5_COMMAND_STR_CASE(QUERY_SRQ); + MLX5_COMMAND_STR_CASE(ARM_RQ); + MLX5_COMMAND_STR_CASE(CREATE_XRC_SRQ); + MLX5_COMMAND_STR_CASE(DESTROY_XRC_SRQ); + MLX5_COMMAND_STR_CASE(QUERY_XRC_SRQ); + MLX5_COMMAND_STR_CASE(ARM_XRC_SRQ); + MLX5_COMMAND_STR_CASE(CREATE_DCT); + MLX5_COMMAND_STR_CASE(DESTROY_DCT); + MLX5_COMMAND_STR_CASE(DRAIN_DCT); + MLX5_COMMAND_STR_CASE(QUERY_DCT); + MLX5_COMMAND_STR_CASE(ARM_DCT_FOR_KEY_VIOLATION); + MLX5_COMMAND_STR_CASE(QUERY_VPORT_STATE); + MLX5_COMMAND_STR_CASE(MODIFY_VPORT_STATE); + MLX5_COMMAND_STR_CASE(QUERY_ESW_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_ESW_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_NIC_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_NIC_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_ROCE_ADDRESS); + MLX5_COMMAND_STR_CASE(SET_ROCE_ADDRESS); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY); + MLX5_COMMAND_STR_CASE(QUERY_VNIC_ENV); + MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER); + MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); + MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); + MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); + MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(ALLOC_PD); + MLX5_COMMAND_STR_CASE(DEALLOC_PD); + MLX5_COMMAND_STR_CASE(ALLOC_UAR); + MLX5_COMMAND_STR_CASE(DEALLOC_UAR); + MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION); + MLX5_COMMAND_STR_CASE(ACCESS_REG); + MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG); + MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG); + MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG); + MLX5_COMMAND_STR_CASE(MAD_IFC); + MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX); + MLX5_COMMAND_STR_CASE(SET_MAD_DEMUX); + MLX5_COMMAND_STR_CASE(NOP); + MLX5_COMMAND_STR_CASE(ALLOC_XRCD); + MLX5_COMMAND_STR_CASE(DEALLOC_XRCD); + MLX5_COMMAND_STR_CASE(ALLOC_TRANSPORT_DOMAIN); + MLX5_COMMAND_STR_CASE(DEALLOC_TRANSPORT_DOMAIN); + MLX5_COMMAND_STR_CASE(QUERY_CONG_STATUS); + MLX5_COMMAND_STR_CASE(MODIFY_CONG_STATUS); + MLX5_COMMAND_STR_CASE(QUERY_CONG_PARAMS); + MLX5_COMMAND_STR_CASE(MODIFY_CONG_PARAMS); + MLX5_COMMAND_STR_CASE(QUERY_CONG_STATISTICS); + MLX5_COMMAND_STR_CASE(ADD_VXLAN_UDP_DPORT); + MLX5_COMMAND_STR_CASE(DELETE_VXLAN_UDP_DPORT); + MLX5_COMMAND_STR_CASE(SET_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(QUERY_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(SET_WOL_ROL); + MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL); + MLX5_COMMAND_STR_CASE(CREATE_LAG); + MLX5_COMMAND_STR_CASE(MODIFY_LAG); + MLX5_COMMAND_STR_CASE(QUERY_LAG); + MLX5_COMMAND_STR_CASE(DESTROY_LAG); + MLX5_COMMAND_STR_CASE(CREATE_VPORT_LAG); + MLX5_COMMAND_STR_CASE(DESTROY_VPORT_LAG); + MLX5_COMMAND_STR_CASE(CREATE_TIR); + MLX5_COMMAND_STR_CASE(MODIFY_TIR); + MLX5_COMMAND_STR_CASE(DESTROY_TIR); + MLX5_COMMAND_STR_CASE(QUERY_TIR); + MLX5_COMMAND_STR_CASE(CREATE_SQ); + MLX5_COMMAND_STR_CASE(MODIFY_SQ); + MLX5_COMMAND_STR_CASE(DESTROY_SQ); + MLX5_COMMAND_STR_CASE(QUERY_SQ); + MLX5_COMMAND_STR_CASE(CREATE_RQ); + MLX5_COMMAND_STR_CASE(MODIFY_RQ); + MLX5_COMMAND_STR_CASE(DESTROY_RQ); + MLX5_COMMAND_STR_CASE(QUERY_RQ); + MLX5_COMMAND_STR_CASE(CREATE_RMP); + MLX5_COMMAND_STR_CASE(MODIFY_RMP); + MLX5_COMMAND_STR_CASE(DESTROY_RMP); + MLX5_COMMAND_STR_CASE(QUERY_RMP); + MLX5_COMMAND_STR_CASE(CREATE_TIS); + MLX5_COMMAND_STR_CASE(MODIFY_TIS); + MLX5_COMMAND_STR_CASE(DESTROY_TIS); + MLX5_COMMAND_STR_CASE(QUERY_TIS); + MLX5_COMMAND_STR_CASE(CREATE_RQT); + MLX5_COMMAND_STR_CASE(MODIFY_RQT); + MLX5_COMMAND_STR_CASE(DESTROY_RQT); + MLX5_COMMAND_STR_CASE(QUERY_RQT); + MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ROOT); + MLX5_COMMAND_STR_CASE(CREATE_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(DESTROY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(CREATE_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(DESTROY_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(DELETE_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER); + MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); + MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP); + MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP); + MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP); + MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS); + MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP); + MLX5_COMMAND_STR_CASE(CREATE_XRQ); + MLX5_COMMAND_STR_CASE(DESTROY_XRQ); + MLX5_COMMAND_STR_CASE(QUERY_XRQ); + MLX5_COMMAND_STR_CASE(ARM_XRQ); + MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); + default: return "unknown command opcode"; + } +} + +static const char *cmd_status_str(u8 status) +{ + switch (status) { + case MLX5_CMD_STAT_OK: + return "OK"; + case MLX5_CMD_STAT_INT_ERR: + return "internal error"; + case MLX5_CMD_STAT_BAD_OP_ERR: + return "bad operation"; + case MLX5_CMD_STAT_BAD_PARAM_ERR: + return "bad parameter"; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: + return "bad system state"; + case MLX5_CMD_STAT_BAD_RES_ERR: + return "bad resource"; + case MLX5_CMD_STAT_RES_BUSY: + return "resource busy"; + case MLX5_CMD_STAT_LIM_ERR: + return "limits exceeded"; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: + return "bad resource state"; + case MLX5_CMD_STAT_IX_ERR: + return "bad index"; + case MLX5_CMD_STAT_NO_RES_ERR: + return "no resources"; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: + return "bad input length"; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: + return "bad output length"; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: + return "bad QP state"; + case MLX5_CMD_STAT_BAD_PKT_ERR: + return "bad packet (discarded)"; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: + return "bad size too many outstanding CQEs"; + default: + return "unknown status"; + } +} + +static int cmd_status_to_err(u8 status) +{ + switch (status) { + case MLX5_CMD_STAT_OK: return 0; + case MLX5_CMD_STAT_INT_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL; + case MLX5_CMD_STAT_RES_BUSY: return -EBUSY; + case MLX5_CMD_STAT_LIM_ERR: return -ENOMEM; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_IX_ERR: return -EINVAL; + case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL; + default: return -EIO; + } +} + +struct mlx5_ifc_mbox_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_mbox_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome) +{ + *status = MLX5_GET(mbox_out, out, status); + *syndrome = MLX5_GET(mbox_out, out, syndrome); +} + +static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out) +{ + u32 syndrome; + u8 status; + u16 opcode; + u16 op_mod; + u16 uid; + + mlx5_cmd_mbox_status(out, &status, &syndrome); + if (!status) + return 0; + + opcode = MLX5_GET(mbox_in, in, opcode); + op_mod = MLX5_GET(mbox_in, in, op_mod); + uid = MLX5_GET(mbox_in, in, uid); + + if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY) + mlx5_core_err_rl(dev, + "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n", + mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome); + else + mlx5_core_dbg(dev, + "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n", + mlx5_command_str(opcode), + opcode, op_mod, + cmd_status_str(status), + status, + syndrome); + + return cmd_status_to_err(status); +} + +static void dump_command(struct mlx5_core_dev *dev, + struct mlx5_cmd_work_ent *ent, int input) +{ + struct mlx5_cmd_msg *msg = input ? ent->in : ent->out; + u16 op = MLX5_GET(mbox_in, ent->lay->in, opcode); + struct mlx5_cmd_mailbox *next = msg->next; + int n = mlx5_calc_cmd_blocks(msg); + int data_only; + u32 offset = 0; + int dump_len; + int i; + + data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA)); + + if (data_only) + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA, + "dump command data %s(0x%x) %s\n", + mlx5_command_str(op), op, + input ? "INPUT" : "OUTPUT"); + else + mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n", + mlx5_command_str(op), op, + input ? "INPUT" : "OUTPUT"); + + if (data_only) { + if (input) { + dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset); + offset += sizeof(ent->lay->in); + } else { + dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset); + offset += sizeof(ent->lay->out); + } + } else { + dump_buf(ent->lay, sizeof(*ent->lay), 0, offset); + offset += sizeof(*ent->lay); + } + + for (i = 0; i < n && next; i++) { + if (data_only) { + dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset); + dump_buf(next->buf, dump_len, 1, offset); + offset += MLX5_CMD_DATA_BLOCK_SIZE; + } else { + mlx5_core_dbg(dev, "command block:\n"); + dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset); + offset += sizeof(struct mlx5_cmd_prot_block); + } + next = next->next; + } + + if (data_only) + pr_debug("\n"); +} + +static u16 msg_to_opcode(struct mlx5_cmd_msg *in) +{ + return MLX5_GET(mbox_in, in->first.data, opcode); +} + +static void cb_timeout_handler(struct work_struct *work) +{ + struct delayed_work *dwork = container_of(work, struct delayed_work, + work); + struct mlx5_cmd_work_ent *ent = container_of(dwork, + struct mlx5_cmd_work_ent, + cb_timeout_work); + struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, + cmd); + + ent->ret = -ETIMEDOUT; + mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); +} + +static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); +static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, + struct mlx5_cmd_msg *msg); + +static void cmd_work_handler(struct work_struct *work) +{ + struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); + struct mlx5_cmd *cmd = ent->cmd; + struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); + unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); + struct mlx5_cmd_layout *lay; + struct semaphore *sem; + unsigned long flags; + bool poll_cmd = ent->polling; + int alloc_ret; + int cmd_mode; + + complete(&ent->handling); + sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; + down(sem); + if (!ent->page_queue) { + alloc_ret = alloc_ent(cmd); + if (alloc_ret < 0) { + mlx5_core_err(dev, "failed to allocate command entry\n"); + if (ent->callback) { + ent->callback(-EAGAIN, ent->context); + mlx5_free_cmd_msg(dev, ent->out); + free_msg(dev, ent->in); + free_cmd(ent); + } else { + ent->ret = -EAGAIN; + complete(&ent->done); + } + up(sem); + return; + } + ent->idx = alloc_ret; + } else { + ent->idx = cmd->max_reg_cmds; + spin_lock_irqsave(&cmd->alloc_lock, flags); + clear_bit(ent->idx, &cmd->bitmask); + spin_unlock_irqrestore(&cmd->alloc_lock, flags); + } + + cmd->ent_arr[ent->idx] = ent; + lay = get_inst(cmd, ent->idx); + ent->lay = lay; + memset(lay, 0, sizeof(*lay)); + memcpy(lay->in, ent->in->first.data, sizeof(lay->in)); + ent->op = be32_to_cpu(lay->in[0]) >> 16; + if (ent->in->next) + lay->in_ptr = cpu_to_be64(ent->in->next->dma); + lay->inlen = cpu_to_be32(ent->in->len); + if (ent->out->next) + lay->out_ptr = cpu_to_be64(ent->out->next->dma); + lay->outlen = cpu_to_be32(ent->out->len); + lay->type = MLX5_PCI_CMD_XPORT; + lay->token = ent->token; + lay->status_own = CMD_OWNER_HW; + set_signature(ent, !cmd->checksum_disabled); + dump_command(dev, ent, 1); + ent->ts1 = ktime_get_ns(); + cmd_mode = cmd->mode; + + if (ent->callback) + schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); + set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); + + /* Skip sending command to fw if internal error */ + if (pci_channel_offline(dev->pdev) || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + u8 status = 0; + u32 drv_synd; + + ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status); + MLX5_SET(mbox_out, ent->out, status, status); + MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); + + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + /* no doorbell, no need to keep the entry */ + free_ent(cmd, ent->idx); + if (ent->callback) + free_cmd(ent); + return; + } + + /* ring doorbell after the descriptor is valid */ + mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); + wmb(); + iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); + mmiowb(); + /* if not in polling don't use ent after this point */ + if (cmd_mode == CMD_MODE_POLLING || poll_cmd) { + poll_timeout(ent); + /* make sure we read the descriptor after ownership is SW */ + rmb(); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT)); + } +} + +static const char *deliv_status_to_str(u8 status) +{ + switch (status) { + case MLX5_CMD_DELIVERY_STAT_OK: + return "no errors"; + case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: + return "signature error"; + case MLX5_CMD_DELIVERY_STAT_TOK_ERR: + return "token error"; + case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: + return "bad block number"; + case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: + return "output pointer not aligned to block size"; + case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: + return "input pointer not aligned to block size"; + case MLX5_CMD_DELIVERY_STAT_FW_ERR: + return "firmware internal error"; + case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: + return "command input length error"; + case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: + return "command output length error"; + case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: + return "reserved fields not cleared"; + case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: + return "bad command descriptor type"; + default: + return "unknown status code"; + } +} + +static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) +{ + unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); + struct mlx5_cmd *cmd = &dev->cmd; + int err; + + if (!wait_for_completion_timeout(&ent->handling, timeout) && + cancel_work_sync(&ent->work)) { + ent->ret = -ECANCELED; + goto out_err; + } + if (cmd->mode == CMD_MODE_POLLING || ent->polling) { + wait_for_completion(&ent->done); + } else if (!wait_for_completion_timeout(&ent->done, timeout)) { + ent->ret = -ETIMEDOUT; + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + } + +out_err: + err = ent->ret; + + if (err == -ETIMEDOUT) { + mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); + } else if (err == -ECANCELED) { + mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); + } + mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", + err, deliv_status_to_str(ent->status), ent->status); + + return err; +} + +/* Notes: + * 1. Callback functions may not sleep + * 2. page queue commands do not support asynchrous completion + */ +static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, void *uout, int uout_size, + mlx5_cmd_cbk_t callback, + void *context, int page_queue, u8 *status, + u8 token, bool force_polling) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; + struct mlx5_cmd_stats *stats; + int err = 0; + s64 ds; + u16 op; + + if (callback && page_queue) + return -EINVAL; + + ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context, + page_queue); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + ent->token = token; + ent->polling = force_polling; + + init_completion(&ent->handling); + if (!callback) + init_completion(&ent->done); + + INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler); + INIT_WORK(&ent->work, cmd_work_handler); + if (page_queue) { + cmd_work_handler(&ent->work); + } else if (!queue_work(cmd->wq, &ent->work)) { + mlx5_core_warn(dev, "failed to queue work\n"); + err = -ENOMEM; + goto out_free; + } + + if (callback) + goto out; + + err = wait_func(dev, ent); + if (err == -ETIMEDOUT) + goto out; + if (err == -ECANCELED) + goto out_free; + + ds = ent->ts2 - ent->ts1; + op = MLX5_GET(mbox_in, in->first.data, opcode); + if (op < ARRAY_SIZE(cmd->stats)) { + stats = &cmd->stats[op]; + spin_lock_irq(&stats->lock); + stats->sum += ds; + ++stats->n; + spin_unlock_irq(&stats->lock); + } + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, + "fw exec time for %s is %lld nsec\n", + mlx5_command_str(op), ds); + *status = ent->status; + +out_free: + free_cmd(ent); +out: + return err; +} + +static ssize_t dbg_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char lbuf[3]; + int err; + + if (!dbg->in_msg || !dbg->out_msg) + return -ENOMEM; + + if (count < sizeof(lbuf) - 1) + return -EINVAL; + + if (copy_from_user(lbuf, buf, sizeof(lbuf) - 1)) + return -EFAULT; + + lbuf[sizeof(lbuf) - 1] = 0; + + if (strcmp(lbuf, "go")) + return -EINVAL; + + err = mlx5_cmd_exec(dev, dbg->in_msg, dbg->inlen, dbg->out_msg, dbg->outlen); + + return err ? err : count; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = dbg_write, +}; + +static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size, + u8 token) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_mailbox *next; + int copy; + + if (!to || !from) + return -ENOMEM; + + copy = min_t(int, size, sizeof(to->first.data)); + memcpy(to->first.data, from, copy); + size -= copy; + from += copy; + + next = to->next; + while (size) { + if (!next) { + /* this is a BUG */ + return -ENOMEM; + } + + copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); + block = next->buf; + memcpy(block->data, from, copy); + from += copy; + size -= copy; + block->token = token; + next = next->next; + } + + return 0; +} + +static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_mailbox *next; + int copy; + + if (!to || !from) + return -ENOMEM; + + copy = min_t(int, size, sizeof(from->first.data)); + memcpy(to, from->first.data, copy); + size -= copy; + to += copy; + + next = from->next; + while (size) { + if (!next) { + /* this is a BUG */ + return -ENOMEM; + } + + copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); + block = next->buf; + + memcpy(to, block->data, copy); + to += copy; + size -= copy; + next = next->next; + } + + return 0; +} + +static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev, + gfp_t flags) +{ + struct mlx5_cmd_mailbox *mailbox; + + mailbox = kmalloc(sizeof(*mailbox), flags); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = dma_pool_zalloc(dev->cmd.pool, flags, + &mailbox->dma); + if (!mailbox->buf) { + mlx5_core_dbg(dev, "failed allocation\n"); + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + mailbox->next = NULL; + + return mailbox; +} + +static void free_cmd_box(struct mlx5_core_dev *dev, + struct mlx5_cmd_mailbox *mailbox) +{ + dma_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} + +static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, + gfp_t flags, int size, + u8 token) +{ + struct mlx5_cmd_mailbox *tmp, *head = NULL; + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_msg *msg; + int err; + int n; + int i; + + msg = kzalloc(sizeof(*msg), flags); + if (!msg) + return ERR_PTR(-ENOMEM); + + msg->len = size; + n = mlx5_calc_cmd_blocks(msg); + + for (i = 0; i < n; i++) { + tmp = alloc_cmd_box(dev, flags); + if (IS_ERR(tmp)) { + mlx5_core_warn(dev, "failed allocating block\n"); + err = PTR_ERR(tmp); + goto err_alloc; + } + + block = tmp->buf; + tmp->next = head; + block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0); + block->block_num = cpu_to_be32(n - i - 1); + block->token = token; + head = tmp; + } + msg->next = head; + return msg; + +err_alloc: + while (head) { + tmp = head->next; + free_cmd_box(dev, head); + head = tmp; + } + kfree(msg); + + return ERR_PTR(err); +} + +static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, + struct mlx5_cmd_msg *msg) +{ + struct mlx5_cmd_mailbox *head = msg->next; + struct mlx5_cmd_mailbox *next; + + while (head) { + next = head->next; + free_cmd_box(dev, head); + head = next; + } + kfree(msg); +} + +static ssize_t data_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + void *ptr; + + if (*pos != 0) + return -EINVAL; + + kfree(dbg->in_msg); + dbg->in_msg = NULL; + dbg->inlen = 0; + ptr = memdup_user(buf, count); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + dbg->in_msg = ptr; + dbg->inlen = count; + + *pos = count; + + return count; +} + +static ssize_t data_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + + if (!dbg->out_msg) + return -ENOMEM; + + return simple_read_from_buffer(buf, count, pos, dbg->out_msg, + dbg->outlen); +} + +static const struct file_operations dfops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = data_write, + .read = data_read, +}; + +static ssize_t outlen_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char outlen[8]; + int err; + + err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen); + if (err < 0) + return err; + + return simple_read_from_buffer(buf, count, pos, outlen, err); +} + +static ssize_t outlen_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char outlen_str[8] = {0}; + int outlen; + void *ptr; + int err; + + if (*pos != 0 || count > 6) + return -EINVAL; + + kfree(dbg->out_msg); + dbg->out_msg = NULL; + dbg->outlen = 0; + + if (copy_from_user(outlen_str, buf, count)) + return -EFAULT; + + err = sscanf(outlen_str, "%d", &outlen); + if (err < 0) + return err; + + ptr = kzalloc(outlen, GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + dbg->out_msg = ptr; + dbg->outlen = outlen; + + *pos = count; + + return count; +} + +static const struct file_operations olfops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = outlen_write, + .read = outlen_read, +}; + +static void set_wqname(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s", + dev_name(&dev->pdev->dev)); +} + +static void clean_debug_files(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + + if (!mlx5_debugfs_root) + return; + + mlx5_cmdif_debugfs_cleanup(dev); + debugfs_remove_recursive(dbg->dbg_root); +} + +static int create_debugfs_files(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + int err = -ENOMEM; + + if (!mlx5_debugfs_root) + return 0; + + dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root); + if (!dbg->dbg_root) + return err; + + dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root, + dev, &dfops); + if (!dbg->dbg_in) + goto err_dbg; + + dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root, + dev, &dfops); + if (!dbg->dbg_out) + goto err_dbg; + + dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root, + dev, &olfops); + if (!dbg->dbg_outlen) + goto err_dbg; + + dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root, + &dbg->status); + if (!dbg->dbg_status) + goto err_dbg; + + dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops); + if (!dbg->dbg_run) + goto err_dbg; + + mlx5_cmdif_debugfs_init(dev); + + return 0; + +err_dbg: + clean_debug_files(dev); + return err; +} + +static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->max_reg_cmds; i++) + down(&cmd->sem); + down(&cmd->pages_sem); + + cmd->mode = mode; + + up(&cmd->pages_sem); + for (i = 0; i < cmd->max_reg_cmds; i++) + up(&cmd->sem); +} + +void mlx5_cmd_use_events(struct mlx5_core_dev *dev) +{ + mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS); +} + +void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +{ + mlx5_cmd_change_mod(dev, CMD_MODE_POLLING); +} + +static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) +{ + unsigned long flags; + + if (msg->parent) { + spin_lock_irqsave(&msg->parent->lock, flags); + list_add_tail(&msg->list, &msg->parent->head); + spin_unlock_irqrestore(&msg->parent->lock, flags); + } else { + mlx5_free_cmd_msg(dev, msg); + } +} + +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; + mlx5_cmd_cbk_t callback; + void *context; + int err; + int i; + s64 ds; + struct mlx5_cmd_stats *stats; + unsigned long flags; + unsigned long vector; + + /* there can be at most 32 command queues */ + vector = vec & 0xffffffff; + for (i = 0; i < (1 << cmd->log_sz); i++) { + if (test_bit(i, &vector)) { + struct semaphore *sem; + + ent = cmd->ent_arr[i]; + + /* if we already completed the command, ignore it */ + if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, + &ent->state)) { + /* only real completion can free the cmd slot */ + if (!forced) { + mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n", + ent->idx); + free_ent(cmd, ent->idx); + free_cmd(ent); + } + continue; + } + + if (ent->callback) + cancel_delayed_work(&ent->cb_timeout_work); + if (ent->page_queue) + sem = &cmd->pages_sem; + else + sem = &cmd->sem; + ent->ts2 = ktime_get_ns(); + memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); + dump_command(dev, ent, 0); + if (!ent->ret) { + if (!cmd->checksum_disabled) + ent->ret = verify_signature(ent); + else + ent->ret = 0; + if (vec & MLX5_TRIGGERED_CMD_COMP) + ent->status = MLX5_DRIVER_STATUS_ABORTED; + else + ent->status = ent->lay->status_own >> 1; + + mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", + ent->ret, deliv_status_to_str(ent->status), ent->status); + } + + /* only real completion will free the entry slot */ + if (!forced) + free_ent(cmd, ent->idx); + + if (ent->callback) { + ds = ent->ts2 - ent->ts1; + if (ent->op < ARRAY_SIZE(cmd->stats)) { + stats = &cmd->stats[ent->op]; + spin_lock_irqsave(&stats->lock, flags); + stats->sum += ds; + ++stats->n; + spin_unlock_irqrestore(&stats->lock, flags); + } + + callback = ent->callback; + context = ent->context; + err = ent->ret; + if (!err) { + err = mlx5_copy_from_msg(ent->uout, + ent->out, + ent->uout_size); + + err = err ? err : mlx5_cmd_check(dev, + ent->in->first.data, + ent->uout); + } + + mlx5_free_cmd_msg(dev, ent->out); + free_msg(dev, ent->in); + + err = err ? err : ent->status; + if (!forced) + free_cmd(ent); + callback(err, context); + } else { + complete(&ent->done); + } + up(sem); + } + } +} +EXPORT_SYMBOL(mlx5_cmd_comp_handler); + +static int status_to_err(u8 status) +{ + return status ? -1 : 0; /* TBD more meaningful codes */ +} + +static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, + gfp_t gfp) +{ + struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM); + struct cmd_msg_cache *ch = NULL; + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + if (in_size <= 16) + goto cache_miss; + + for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) { + ch = &cmd->cache[i]; + if (in_size > ch->max_inbox_size) + continue; + spin_lock_irq(&ch->lock); + if (list_empty(&ch->head)) { + spin_unlock_irq(&ch->lock); + continue; + } + msg = list_entry(ch->head.next, typeof(*msg), list); + /* For cached lists, we must explicitly state what is + * the real size + */ + msg->len = in_size; + list_del(&msg->list); + spin_unlock_irq(&ch->lock); + break; + } + + if (!IS_ERR(msg)) + return msg; + +cache_miss: + msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0); + return msg; +} + +static int is_manage_pages(void *in) +{ + return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES; +} + +static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size, mlx5_cmd_cbk_t callback, void *context, + bool force_polling) +{ + struct mlx5_cmd_msg *inb; + struct mlx5_cmd_msg *outb; + int pages_queue; + gfp_t gfp; + int err; + u8 status = 0; + u32 drv_synd; + u8 token; + + if (pci_channel_offline(dev->pdev) || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + u16 opcode = MLX5_GET(mbox_in, in, opcode); + + err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); + MLX5_SET(mbox_out, out, status, status); + MLX5_SET(mbox_out, out, syndrome, drv_synd); + return err; + } + + pages_queue = is_manage_pages(in); + gfp = callback ? GFP_ATOMIC : GFP_KERNEL; + + inb = alloc_msg(dev, in_size, gfp); + if (IS_ERR(inb)) { + err = PTR_ERR(inb); + return err; + } + + token = alloc_token(&dev->cmd); + + err = mlx5_copy_to_msg(inb, in, in_size, token); + if (err) { + mlx5_core_warn(dev, "err %d\n", err); + goto out_in; + } + + outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token); + if (IS_ERR(outb)) { + err = PTR_ERR(outb); + goto out_in; + } + + err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, + pages_queue, &status, token, force_polling); + if (err) + goto out_out; + + mlx5_core_dbg(dev, "err %d, status %d\n", err, status); + if (status) { + err = status_to_err(status); + goto out_out; + } + + if (!callback) + err = mlx5_copy_from_msg(out, outb, out_size); + +out_out: + if (!callback) + mlx5_free_cmd_msg(dev, outb); + +out_in: + if (!callback) + free_msg(dev, inb); + return err; +} + +int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size) +{ + int err; + + err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); + return err ? : mlx5_cmd_check(dev, in, out); +} +EXPORT_SYMBOL(mlx5_cmd_exec); + +int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, + void *out, int out_size, mlx5_cmd_cbk_t callback, + void *context) +{ + return cmd_exec(dev, in, in_size, out, out_size, callback, context, + false); +} +EXPORT_SYMBOL(mlx5_cmd_exec_cb); + +int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, + void *out, int out_size) +{ + int err; + + err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); + + return err ? : mlx5_cmd_check(dev, in, out); +} +EXPORT_SYMBOL(mlx5_cmd_exec_polling); + +static void destroy_msg_cache(struct mlx5_core_dev *dev) +{ + struct cmd_msg_cache *ch; + struct mlx5_cmd_msg *msg; + struct mlx5_cmd_msg *n; + int i; + + for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) { + ch = &dev->cmd.cache[i]; + list_for_each_entry_safe(msg, n, &ch->head, list) { + list_del(&msg->list); + mlx5_free_cmd_msg(dev, msg); + } + } +} + +static unsigned cmd_cache_num_ent[MLX5_NUM_COMMAND_CACHES] = { + 512, 32, 16, 8, 2 +}; + +static unsigned cmd_cache_ent_size[MLX5_NUM_COMMAND_CACHES] = { + 16 + MLX5_CMD_DATA_BLOCK_SIZE, + 16 + MLX5_CMD_DATA_BLOCK_SIZE * 2, + 16 + MLX5_CMD_DATA_BLOCK_SIZE * 16, + 16 + MLX5_CMD_DATA_BLOCK_SIZE * 256, + 16 + MLX5_CMD_DATA_BLOCK_SIZE * 512, +}; + +static void create_msg_cache(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct cmd_msg_cache *ch; + struct mlx5_cmd_msg *msg; + int i; + int k; + + /* Initialize and fill the caches with initial entries */ + for (k = 0; k < MLX5_NUM_COMMAND_CACHES; k++) { + ch = &cmd->cache[k]; + spin_lock_init(&ch->lock); + INIT_LIST_HEAD(&ch->head); + ch->num_ent = cmd_cache_num_ent[k]; + ch->max_inbox_size = cmd_cache_ent_size[k]; + for (i = 0; i < ch->num_ent; i++) { + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL | __GFP_NOWARN, + ch->max_inbox_size, 0); + if (IS_ERR(msg)) + break; + msg->parent = ch; + list_add_tail(&msg->list, &ch->head); + } + } +} + +static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) +{ + struct device *ddev = &dev->pdev->dev; + + cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, + &cmd->alloc_dma, GFP_KERNEL); + if (!cmd->cmd_alloc_buf) + return -ENOMEM; + + /* make sure it is aligned to 4K */ + if (!((uintptr_t)cmd->cmd_alloc_buf & (MLX5_ADAPTER_PAGE_SIZE - 1))) { + cmd->cmd_buf = cmd->cmd_alloc_buf; + cmd->dma = cmd->alloc_dma; + cmd->alloc_size = MLX5_ADAPTER_PAGE_SIZE; + return 0; + } + + dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf, + cmd->alloc_dma); + cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, + 2 * MLX5_ADAPTER_PAGE_SIZE - 1, + &cmd->alloc_dma, GFP_KERNEL); + if (!cmd->cmd_alloc_buf) + return -ENOMEM; + + cmd->cmd_buf = PTR_ALIGN(cmd->cmd_alloc_buf, MLX5_ADAPTER_PAGE_SIZE); + cmd->dma = ALIGN(cmd->alloc_dma, MLX5_ADAPTER_PAGE_SIZE); + cmd->alloc_size = 2 * MLX5_ADAPTER_PAGE_SIZE - 1; + return 0; +} + +static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) +{ + struct device *ddev = &dev->pdev->dev; + + dma_free_coherent(ddev, cmd->alloc_size, cmd->cmd_alloc_buf, + cmd->alloc_dma); +} + +int mlx5_cmd_init(struct mlx5_core_dev *dev) +{ + int size = sizeof(struct mlx5_cmd_prot_block); + int align = roundup_pow_of_two(size); + struct mlx5_cmd *cmd = &dev->cmd; + u32 cmd_h, cmd_l; + u16 cmd_if_rev; + int err; + int i; + + memset(cmd, 0, sizeof(*cmd)); + cmd_if_rev = cmdif_rev(dev); + if (cmd_if_rev != CMD_IF_REV) { + dev_err(&dev->pdev->dev, + "Driver cmdif rev(%d) differs from firmware's(%d)\n", + CMD_IF_REV, cmd_if_rev); + return -EINVAL; + } + + cmd->pool = dma_pool_create("mlx5_cmd", &dev->pdev->dev, size, align, + 0); + if (!cmd->pool) + return -ENOMEM; + + err = alloc_cmd_page(dev, cmd); + if (err) + goto err_free_pool; + + cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff; + cmd->log_sz = cmd_l >> 4 & 0xf; + cmd->log_stride = cmd_l & 0xf; + if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) { + dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n", + 1 << cmd->log_sz); + err = -EINVAL; + goto err_free_page; + } + + if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) { + dev_err(&dev->pdev->dev, "command queue size overflow\n"); + err = -EINVAL; + goto err_free_page; + } + + cmd->checksum_disabled = 1; + cmd->max_reg_cmds = (1 << cmd->log_sz) - 1; + cmd->bitmask = (1UL << cmd->max_reg_cmds) - 1; + + cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; + if (cmd->cmdif_rev > CMD_IF_REV) { + dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n", + CMD_IF_REV, cmd->cmdif_rev); + err = -EOPNOTSUPP; + goto err_free_page; + } + + spin_lock_init(&cmd->alloc_lock); + spin_lock_init(&cmd->token_lock); + for (i = 0; i < ARRAY_SIZE(cmd->stats); i++) + spin_lock_init(&cmd->stats[i].lock); + + sema_init(&cmd->sem, cmd->max_reg_cmds); + sema_init(&cmd->pages_sem, 1); + + cmd_h = (u32)((u64)(cmd->dma) >> 32); + cmd_l = (u32)(cmd->dma); + if (cmd_l & 0xfff) { + dev_err(&dev->pdev->dev, "invalid command queue address\n"); + err = -ENOMEM; + goto err_free_page; + } + + iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); + iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz); + + /* Make sure firmware sees the complete address before we proceed */ + wmb(); + + mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma)); + + cmd->mode = CMD_MODE_POLLING; + + create_msg_cache(dev); + + set_wqname(dev); + cmd->wq = create_singlethread_workqueue(cmd->wq_name); + if (!cmd->wq) { + dev_err(&dev->pdev->dev, "failed to create command workqueue\n"); + err = -ENOMEM; + goto err_cache; + } + + err = create_debugfs_files(dev); + if (err) { + err = -ENOMEM; + goto err_wq; + } + + return 0; + +err_wq: + destroy_workqueue(cmd->wq); + +err_cache: + destroy_msg_cache(dev); + +err_free_page: + free_cmd_page(dev, cmd); + +err_free_pool: + dma_pool_destroy(cmd->pool); + + return err; +} +EXPORT_SYMBOL(mlx5_cmd_init); + +void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + clean_debug_files(dev); + destroy_workqueue(cmd->wq); + destroy_msg_cache(dev); + free_cmd_page(dev, cmd); + dma_pool_destroy(cmd->pool); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c new file mode 100644 index 000000000..a4179122a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/hardirq.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include <rdma/ib_verbs.h> +#include <linux/mlx5/cq.h> +#include "mlx5_core.h" + +#define TASKLET_MAX_TIME 2 +#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) + +void mlx5_cq_tasklet_cb(unsigned long data) +{ + unsigned long flags; + unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES; + struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data; + struct mlx5_core_cq *mcq; + struct mlx5_core_cq *temp; + + spin_lock_irqsave(&ctx->lock, flags); + list_splice_tail_init(&ctx->list, &ctx->process_list); + spin_unlock_irqrestore(&ctx->lock, flags); + + list_for_each_entry_safe(mcq, temp, &ctx->process_list, + tasklet_ctx.list) { + list_del_init(&mcq->tasklet_ctx.list); + mcq->tasklet_ctx.comp(mcq); + mlx5_cq_put(mcq); + if (time_after(jiffies, end)) + break; + } + + if (!list_empty(&ctx->process_list)) + tasklet_schedule(&ctx->task); +} + +static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) +{ + unsigned long flags; + struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; + + spin_lock_irqsave(&tasklet_ctx->lock, flags); + /* When migrating CQs between EQs will be implemented, please note + * that you need to sync this point. It is possible that + * while migrating a CQ, completions on the old EQs could + * still arrive. + */ + if (list_empty_careful(&cq->tasklet_ctx.list)) { + mlx5_cq_hold(cq); + list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); + } + spin_unlock_irqrestore(&tasklet_ctx->lock, flags); +} + +int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen) +{ + int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn); + u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + u32 din[MLX5_ST_SZ_DW(destroy_cq_in)]; + struct mlx5_eq *eq; + int err; + + eq = mlx5_eqn2eq(dev, eqn); + if (IS_ERR(eq)) + return PTR_ERR(eq); + + memset(out, 0, sizeof(out)); + MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (err) + return err; + + cq->cqn = MLX5_GET(create_cq_out, out, cqn); + cq->cons_index = 0; + cq->arm_sn = 0; + cq->eq = eq; + refcount_set(&cq->refcount, 1); + init_completion(&cq->free); + if (!cq->comp) + cq->comp = mlx5_add_cq_to_tasklet; + /* assuming CQ will be deleted before the EQ */ + cq->tasklet_ctx.priv = &eq->tasklet_ctx; + INIT_LIST_HEAD(&cq->tasklet_ctx.list); + + /* Add to comp EQ CQ tree to recv comp events */ + err = mlx5_eq_add_cq(eq, cq); + if (err) + goto err_cmd; + + /* Add to async EQ CQ tree to recv async events */ + err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq); + if (err) + goto err_cq_add; + + cq->pid = current->pid; + err = mlx5_debug_cq_add(dev, cq); + if (err) + mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n", + cq->cqn); + + cq->uar = dev->priv.uar; + + return 0; + +err_cq_add: + mlx5_eq_del_cq(eq, cq); +err_cmd: + memset(din, 0, sizeof(din)); + memset(dout, 0, sizeof(dout)); + MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, cqn, cq->cqn); + mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); + return err; +} +EXPORT_SYMBOL(mlx5_core_create_cq); + +int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; + int err; + + err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq); + if (err) + return err; + + err = mlx5_eq_del_cq(cq->eq, cq); + if (err) + return err; + + MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + synchronize_irq(cq->irqn); + + mlx5_debug_cq_remove(dev, cq); + mlx5_cq_put(cq); + wait_for_completion(&cq->free); + + return 0; +} +EXPORT_SYMBOL(mlx5_core_destroy_cq); + +int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {0}; + + MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ); + MLX5_SET(query_cq_in, in, cqn, cq->cqn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} +EXPORT_SYMBOL(mlx5_core_query_cq); + +int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0}; + + MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_modify_cq); + +int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, + struct mlx5_core_cq *cq, + u16 cq_period, + u16 cq_max_count) +{ + u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {0}; + void *cqc; + + MLX5_SET(modify_cq_in, in, cqn, cq->cqn); + cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, cq_period, cq_period); + MLX5_SET(cqc, cqc, cq_max_count, cq_max_count); + MLX5_SET(modify_cq_in, in, + modify_field_select_resize_field_select.modify_field_select.modify_field_select, + MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); + + return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); +} +EXPORT_SYMBOL(mlx5_core_modify_cq_moderation); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c new file mode 100644 index 000000000..90fabd612 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -0,0 +1,593 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +enum { + QP_PID, + QP_STATE, + QP_XPORT, + QP_MTU, + QP_N_RECV, + QP_RECV_SZ, + QP_N_SEND, + QP_LOG_PG_SZ, + QP_RQPN, +}; + +static char *qp_fields[] = { + [QP_PID] = "pid", + [QP_STATE] = "state", + [QP_XPORT] = "transport", + [QP_MTU] = "mtu", + [QP_N_RECV] = "num_recv", + [QP_RECV_SZ] = "rcv_wqe_sz", + [QP_N_SEND] = "num_send", + [QP_LOG_PG_SZ] = "log2_page_sz", + [QP_RQPN] = "remote_qpn", +}; + +enum { + EQ_NUM_EQES, + EQ_INTR, + EQ_LOG_PG_SZ, +}; + +static char *eq_fields[] = { + [EQ_NUM_EQES] = "num_eqes", + [EQ_INTR] = "intr", + [EQ_LOG_PG_SZ] = "log_page_size", +}; + +enum { + CQ_PID, + CQ_NUM_CQES, + CQ_LOG_PG_SZ, +}; + +static char *cq_fields[] = { + [CQ_PID] = "pid", + [CQ_NUM_CQES] = "num_cqes", + [CQ_LOG_PG_SZ] = "log_page_size", +}; + +struct dentry *mlx5_debugfs_root; +EXPORT_SYMBOL(mlx5_debugfs_root); + +void mlx5_register_debugfs(void) +{ + mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL); + if (IS_ERR_OR_NULL(mlx5_debugfs_root)) + mlx5_debugfs_root = NULL; +} + +void mlx5_unregister_debugfs(void) +{ + debugfs_remove(mlx5_debugfs_root); +} + +int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return 0; + + atomic_set(&dev->num_qps, 0); + + dev->priv.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg_root); + if (!dev->priv.qp_debugfs) + return -ENOMEM; + + return 0; +} + +void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->priv.qp_debugfs); +} + +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return 0; + + dev->priv.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg_root); + if (!dev->priv.eq_debugfs) + return -ENOMEM; + + return 0; +} + +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->priv.eq_debugfs); +} + +static ssize_t average_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cmd_stats *stats; + u64 field = 0; + int ret; + char tbuf[22]; + + stats = filp->private_data; + spin_lock_irq(&stats->lock); + if (stats->n) + field = div64_u64(stats->sum, stats->n); + spin_unlock_irq(&stats->lock); + ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field); + return simple_read_from_buffer(buf, count, pos, tbuf, ret); +} + +static ssize_t average_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_cmd_stats *stats; + + stats = filp->private_data; + spin_lock_irq(&stats->lock); + stats->sum = 0; + stats->n = 0; + spin_unlock_irq(&stats->lock); + + *pos += count; + + return count; +} + +static const struct file_operations stats_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = average_read, + .write = average_write, +}; + +int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_stats *stats; + struct dentry **cmd; + const char *namep; + int err; + int i; + + if (!mlx5_debugfs_root) + return 0; + + cmd = &dev->priv.cmdif_debugfs; + *cmd = debugfs_create_dir("commands", dev->priv.dbg_root); + if (!*cmd) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) { + stats = &dev->cmd.stats[i]; + namep = mlx5_command_str(i); + if (strcmp(namep, "unknown command opcode")) { + stats->root = debugfs_create_dir(namep, *cmd); + if (!stats->root) { + mlx5_core_warn(dev, "failed adding command %d\n", + i); + err = -ENOMEM; + goto out; + } + + stats->avg = debugfs_create_file("average", 0400, + stats->root, stats, + &stats_fops); + if (!stats->avg) { + mlx5_core_warn(dev, "failed creating debugfs file\n"); + err = -ENOMEM; + goto out; + } + + stats->count = debugfs_create_u64("n", 0400, + stats->root, + &stats->n); + if (!stats->count) { + mlx5_core_warn(dev, "failed creating debugfs file\n"); + err = -ENOMEM; + goto out; + } + } + } + + return 0; +out: + debugfs_remove_recursive(dev->priv.cmdif_debugfs); + return err; +} + +void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->priv.cmdif_debugfs); +} + +int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return 0; + + dev->priv.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg_root); + if (!dev->priv.cq_debugfs) + return -ENOMEM; + + return 0; +} + +void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->priv.cq_debugfs); +} + +static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, + int index, int *is_str) +{ + int outlen = MLX5_ST_SZ_BYTES(query_qp_out); + struct mlx5_qp_context *ctx; + u64 param = 0; + u32 *out; + int err; + int no_sq; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return param; + + err = mlx5_core_qp_query(dev, qp, out, outlen); + if (err) { + mlx5_core_warn(dev, "failed to query qp err=%d\n", err); + goto out; + } + + *is_str = 0; + + /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */ + ctx = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, out, qpc); + + switch (index) { + case QP_PID: + param = qp->pid; + break; + case QP_STATE: + param = (unsigned long)mlx5_qp_state_str(be32_to_cpu(ctx->flags) >> 28); + *is_str = 1; + break; + case QP_XPORT: + param = (unsigned long)mlx5_qp_type_str((be32_to_cpu(ctx->flags) >> 16) & 0xff); + *is_str = 1; + break; + case QP_MTU: + switch (ctx->mtu_msgmax >> 5) { + case IB_MTU_256: + param = 256; + break; + case IB_MTU_512: + param = 512; + break; + case IB_MTU_1024: + param = 1024; + break; + case IB_MTU_2048: + param = 2048; + break; + case IB_MTU_4096: + param = 4096; + break; + default: + param = 0; + } + break; + case QP_N_RECV: + param = 1 << ((ctx->rq_size_stride >> 3) & 0xf); + break; + case QP_RECV_SZ: + param = 1 << ((ctx->rq_size_stride & 7) + 4); + break; + case QP_N_SEND: + no_sq = be16_to_cpu(ctx->sq_crq_size) >> 15; + if (!no_sq) + param = 1 << (be16_to_cpu(ctx->sq_crq_size) >> 11); + else + param = 0; + break; + case QP_LOG_PG_SZ: + param = (be32_to_cpu(ctx->log_pg_sz_remote_qpn) >> 24) & 0x1f; + param += 12; + break; + case QP_RQPN: + param = be32_to_cpu(ctx->log_pg_sz_remote_qpn) & 0xffffff; + break; + } + +out: + kfree(out); + return param; +} + +static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + int index) +{ + int outlen = MLX5_ST_SZ_BYTES(query_eq_out); + u64 param = 0; + void *ctx; + u32 *out; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return param; + + err = mlx5_core_eq_query(dev, eq, out, outlen); + if (err) { + mlx5_core_warn(dev, "failed to query eq\n"); + goto out; + } + ctx = MLX5_ADDR_OF(query_eq_out, out, eq_context_entry); + + switch (index) { + case EQ_NUM_EQES: + param = 1 << MLX5_GET(eqc, ctx, log_eq_size); + break; + case EQ_INTR: + param = MLX5_GET(eqc, ctx, intr); + break; + case EQ_LOG_PG_SZ: + param = MLX5_GET(eqc, ctx, log_page_size) + 12; + break; + } + +out: + kfree(out); + return param; +} + +static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + int index) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cq_out); + u64 param = 0; + void *ctx; + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return param; + + err = mlx5_core_query_cq(dev, cq, out, outlen); + if (err) { + mlx5_core_warn(dev, "failed to query cq\n"); + goto out; + } + ctx = MLX5_ADDR_OF(query_cq_out, out, cq_context); + + switch (index) { + case CQ_PID: + param = cq->pid; + break; + case CQ_NUM_CQES: + param = 1 << MLX5_GET(cqc, ctx, log_cq_size); + break; + case CQ_LOG_PG_SZ: + param = MLX5_GET(cqc, ctx, log_page_size); + break; + } + +out: + kvfree(out); + return param; +} + +static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_field_desc *desc; + struct mlx5_rsc_debug *d; + char tbuf[18]; + int is_str = 0; + u64 field; + int ret; + + desc = filp->private_data; + d = (void *)(desc - desc->i) - sizeof(*d); + switch (d->type) { + case MLX5_DBG_RSC_QP: + field = qp_read_field(d->dev, d->object, desc->i, &is_str); + break; + + case MLX5_DBG_RSC_EQ: + field = eq_read_field(d->dev, d->object, desc->i); + break; + + case MLX5_DBG_RSC_CQ: + field = cq_read_field(d->dev, d->object, desc->i); + break; + + default: + mlx5_core_warn(d->dev, "invalid resource type %d\n", d->type); + return -EINVAL; + } + + if (is_str) + ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)(unsigned long)field); + else + ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field); + + return simple_read_from_buffer(buf, count, pos, tbuf, ret); +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = dbg_read, +}; + +static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type, + struct dentry *root, struct mlx5_rsc_debug **dbg, + int rsn, char **field, int nfile, void *data) +{ + struct mlx5_rsc_debug *d; + char resn[32]; + int err; + int i; + + d = kzalloc(struct_size(d, fields, nfile), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->dev = dev; + d->object = data; + d->type = type; + sprintf(resn, "0x%x", rsn); + d->root = debugfs_create_dir(resn, root); + if (!d->root) { + err = -ENOMEM; + goto out_free; + } + + for (i = 0; i < nfile; i++) { + d->fields[i].i = i; + d->fields[i].dent = debugfs_create_file(field[i], 0400, + d->root, &d->fields[i], + &fops); + if (!d->fields[i].dent) { + err = -ENOMEM; + goto out_rem; + } + } + *dbg = d; + + return 0; +out_rem: + debugfs_remove_recursive(d->root); + +out_free: + kfree(d); + return err; +} + +static void rem_res_tree(struct mlx5_rsc_debug *d) +{ + debugfs_remove_recursive(d->root); + kfree(d); +} + +int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.qp_debugfs, + &qp->dbg, qp->qpn, qp_fields, + ARRAY_SIZE(qp_fields), qp); + if (err) + qp->dbg = NULL; + + return err; +} + +void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) +{ + if (!mlx5_debugfs_root) + return; + + if (qp->dbg) + rem_res_tree(qp->dbg); +} + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.eq_debugfs, + &eq->dbg, eq->eqn, eq_fields, + ARRAY_SIZE(eq_fields), eq); + if (err) + eq->dbg = NULL; + + return err; +} + +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + if (!mlx5_debugfs_root) + return; + + if (eq->dbg) + rem_res_tree(eq->dbg); +} + +int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.cq_debugfs, + &cq->dbg, cq->cqn, cq_fields, + ARRAY_SIZE(cq_fields), cq); + if (err) + cq->dbg = NULL; + + return err; +} + +void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + if (!mlx5_debugfs_root) + return; + + if (cq->dbg) + rem_res_tree(cq->dbg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c new file mode 100644 index 000000000..3692d6a1c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +static LIST_HEAD(intf_list); +static LIST_HEAD(mlx5_dev_list); +/* intf dev list mutex */ +static DEFINE_MUTEX(mlx5_intf_mutex); + +struct mlx5_device_context { + struct list_head list; + struct mlx5_interface *intf; + void *context; + unsigned long state; +}; + +struct mlx5_delayed_event { + struct list_head list; + struct mlx5_core_dev *dev; + enum mlx5_dev_event event; + unsigned long param; +}; + +enum { + MLX5_INTERFACE_ADDED, + MLX5_INTERFACE_ATTACHED, +}; + +static void add_delayed_event(struct mlx5_priv *priv, + struct mlx5_core_dev *dev, + enum mlx5_dev_event event, + unsigned long param) +{ + struct mlx5_delayed_event *delayed_event; + + delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC); + if (!delayed_event) { + mlx5_core_err(dev, "event %d is missed\n", event); + return; + } + + mlx5_core_dbg(dev, "Accumulating event %d\n", event); + delayed_event->dev = dev; + delayed_event->event = event; + delayed_event->param = param; + list_add_tail(&delayed_event->list, &priv->waiting_events_list); +} + +static void delayed_event_release(struct mlx5_device_context *dev_ctx, + struct mlx5_priv *priv) +{ + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + struct mlx5_delayed_event *de; + struct mlx5_delayed_event *n; + struct list_head temp; + + INIT_LIST_HEAD(&temp); + + spin_lock_irq(&priv->ctx_lock); + + priv->is_accum_events = false; + list_splice_init(&priv->waiting_events_list, &temp); + if (!dev_ctx->context) + goto out; + list_for_each_entry_safe(de, n, &temp, list) + dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); + +out: + spin_unlock_irq(&priv->ctx_lock); + + list_for_each_entry_safe(de, n, &temp, list) { + list_del(&de->list); + kfree(de); + } +} + +/* accumulating events that can come after mlx5_ib calls to + * ib_register_device, till adding that interface to the events list. + */ +static void delayed_event_start(struct mlx5_priv *priv) +{ + spin_lock_irq(&priv->ctx_lock); + priv->is_accum_events = true; + spin_unlock_irq(&priv->ctx_lock); +} + +void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + if (!mlx5_lag_intf_add(intf, priv)) + return; + + dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL); + if (!dev_ctx) + return; + + dev_ctx->intf = intf; + + delayed_event_start(priv); + + dev_ctx->context = intf->add(dev); + if (dev_ctx->context) { + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + if (intf->attach) + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + + spin_lock_irq(&priv->ctx_lock); + list_add_tail(&dev_ctx->list, &priv->ctx_list); + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (dev_ctx->intf->pfault) { + if (priv->pfault) { + mlx5_core_err(dev, "multiple page fault handlers not supported"); + } else { + priv->pfault_ctx = dev_ctx->context; + priv->pfault = dev_ctx->intf->pfault; + } + } +#endif + spin_unlock_irq(&priv->ctx_lock); + } + + delayed_event_release(dev_ctx, priv); + + if (!dev_ctx->context) + kfree(dev_ctx); +} + +static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf, + struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf == intf) + return dev_ctx; + return NULL; +} + +void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + spin_lock_irq(&priv->ctx_lock); + if (priv->pfault == dev_ctx->intf->pfault) + priv->pfault = NULL; + spin_unlock_irq(&priv->ctx_lock); + + synchronize_srcu(&priv->pfault_srcu); +#endif + + spin_lock_irq(&priv->ctx_lock); + list_del(&dev_ctx->list); + spin_unlock_irq(&priv->ctx_lock); + + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + intf->remove(dev, dev_ctx->context); + + kfree(dev_ctx); +} + +static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + delayed_event_start(priv); + if (intf->attach) { + if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) + goto out; + if (intf->attach(dev, dev_ctx->context)) + goto out; + + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + goto out; + dev_ctx->context = intf->add(dev); + if (!dev_ctx->context) + goto out; + + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } + +out: + delayed_event_release(dev_ctx, priv); +} + +void mlx5_attach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_attach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->detach) { + if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) + return; + intf->detach(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + intf->remove(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +void mlx5_detach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_detach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +bool mlx5_device_registered(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv; + bool found = false; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + if (priv == &dev->priv) + found = true; + mutex_unlock(&mlx5_intf_mutex); + + return found; +} + +int mlx5_register_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_add_tail(&priv->dev_list, &mlx5_dev_list); + list_for_each_entry(intf, &intf_list, list) + mlx5_add_device(intf, priv); + mutex_unlock(&mlx5_intf_mutex); + + return 0; +} + +void mlx5_unregister_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry_reverse(intf, &intf_list, list) + mlx5_remove_device(intf, priv); + list_del(&priv->dev_list); + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_register_interface(struct mlx5_interface *intf) +{ + struct mlx5_priv *priv; + + if (!intf->add || !intf->remove) + return -EINVAL; + + mutex_lock(&mlx5_intf_mutex); + list_add_tail(&intf->list, &intf_list); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + mlx5_add_device(intf, priv); + mutex_unlock(&mlx5_intf_mutex); + + return 0; +} +EXPORT_SYMBOL(mlx5_register_interface); + +void mlx5_unregister_interface(struct mlx5_interface *intf) +{ + struct mlx5_priv *priv; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + mlx5_remove_device(intf, priv); + list_del(&intf->list); + mutex_unlock(&mlx5_intf_mutex); +} +EXPORT_SYMBOL(mlx5_unregister_interface); + +/* Must be called with intf_mutex held */ +static bool mlx5_has_added_dev_by_protocol(struct mlx5_core_dev *mdev, int protocol) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_interface *intf; + bool found = false; + + list_for_each_entry(intf, &intf_list, list) { + if (intf->protocol == protocol) { + dev_ctx = mlx5_get_device(intf, &mdev->priv); + if (dev_ctx && test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + found = true; + break; + } + } + + return found; +} + +void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol) +{ + mutex_lock(&mlx5_intf_mutex); + if (mlx5_has_added_dev_by_protocol(mdev, protocol)) { + mlx5_remove_dev_by_protocol(mdev, protocol); + mlx5_add_dev_by_protocol(mdev, protocol); + } + mutex_unlock(&mlx5_intf_mutex); +} + +void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) +{ + struct mlx5_priv *priv = &mdev->priv; + struct mlx5_device_context *dev_ctx; + unsigned long flags; + void *result = NULL; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) + if ((dev_ctx->intf->protocol == protocol) && + dev_ctx->intf->get_dev) { + result = dev_ctx->intf->get_dev(dev_ctx->context); + break; + } + + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + return result; +} +EXPORT_SYMBOL(mlx5_get_protocol_dev); + +/* Must be called with intf_mutex held */ +void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) +{ + struct mlx5_interface *intf; + + list_for_each_entry(intf, &intf_list, list) + if (intf->protocol == protocol) { + mlx5_add_device(intf, &dev->priv); + break; + } +} + +/* Must be called with intf_mutex held */ +void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) +{ + struct mlx5_interface *intf; + + list_for_each_entry(intf, &intf_list, list) + if (intf->protocol == protocol) { + mlx5_remove_device(intf, &dev->priv); + break; + } +} + +static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev) +{ + return (u32)((pci_domain_nr(dev->pdev->bus) << 16) | + (dev->pdev->bus->number << 8) | + PCI_SLOT(dev->pdev->devfn)); +} + +/* Must be called with intf_mutex held */ +struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) +{ + u32 pci_id = mlx5_gen_pci_id(dev); + struct mlx5_core_dev *res = NULL; + struct mlx5_core_dev *tmp_dev; + struct mlx5_priv *priv; + + list_for_each_entry(priv, &mlx5_dev_list, dev_list) { + tmp_dev = container_of(priv, struct mlx5_core_dev, priv); + if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) { + res = tmp_dev; + break; + } + } + + return res; +} + +void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, + unsigned long param) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_device_context *dev_ctx; + unsigned long flags; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + if (priv->is_accum_events) + add_delayed_event(priv, dev, event, param); + + /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is + * still in priv->ctx_list. In this case, only notify the dev_ctx if its + * ADDED or ATTACHED bit are set. + */ + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf->event && + (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) || + test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))) + dev_ctx->intf->event(dev, dev_ctx->context, event, param); + + spin_unlock_irqrestore(&priv->ctx_lock, flags); +} + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +void mlx5_core_page_fault(struct mlx5_core_dev *dev, + struct mlx5_pagefault *pfault) +{ + struct mlx5_priv *priv = &dev->priv; + int srcu_idx; + + srcu_idx = srcu_read_lock(&priv->pfault_srcu); + if (priv->pfault) + priv->pfault(dev, priv->pfault_ctx, pfault); + srcu_read_unlock(&priv->pfault_srcu, srcu_idx); +} +#endif + +void mlx5_dev_list_lock(void) +{ + mutex_lock(&mlx5_intf_mutex); +} + +void mlx5_dev_list_unlock(void) +{ + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_dev_list_trylock(void) +{ + return mutex_trylock(&mlx5_intf_mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile new file mode 100644 index 000000000..d8e17110f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c new file mode 100644 index 000000000..0f11fff32 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define CREATE_TRACE_POINTS + +#include "fs_tracepoint.h" +#include <linux/stringify.h> + +#define DECLARE_MASK_VAL(type, name) struct {type m; type v; } name +#define MASK_VAL(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET(spec, mask, fld),\ + .v = MLX5_GET(spec, val, fld)} +#define MASK_VAL_BE(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET_BE(type, spec, mask, fld),\ + .v = MLX5_GET_BE(type, spec, val, fld)} +#define GET_MASKED_VAL(name) (name.m & name.v) + +#define GET_MASK_VAL(name, type, mask, val, fld) \ + (name.m = MLX5_GET(type, mask, fld), \ + name.v = MLX5_GET(type, val, fld), \ + name.m & name.v) +#define PRINT_MASKED_VAL(name, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", name.v); \ + } +#define PRINT_MASKED_VALP(name, cast, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", \ + (cast)&name.v);\ + } + +static void print_lyr_2_4_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_L2(type, name, fld) \ + MASK_VAL(type, fte_match_set_lyr_2_4, name, mask, value, fld) + DECLARE_MASK_VAL(u64, smac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, smac_15_0)}; + DECLARE_MASK_VAL(u64, dmac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)}; + MASK_VAL_L2(u16, ethertype, ethertype); + + PRINT_MASKED_VALP(smac, u8 *, p, "%pM"); + PRINT_MASKED_VALP(dmac, u8 *, p, "%pM"); + PRINT_MASKED_VAL(ethertype, p, "%04x"); + + if (ethertype.m == 0xffff) { + if (ethertype.v == ETH_P_IP) { +#define MASK_VAL_L2_BE(type, name, fld) \ + MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld) + MASK_VAL_L2_BE(u32, src_ipv4, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MASK_VAL_L2_BE(u32, dst_ipv4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p, + "%pI4"); + PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p, + "%pI4"); + } else if (ethertype.v == ETH_P_IPV6) { + static const struct in6_addr full_ones = { + .in6_u.u6_addr32 = {__constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff)}, + }; + DECLARE_MASK_VAL(struct in6_addr, src_ipv6); + DECLARE_MASK_VAL(struct in6_addr, dst_ipv6); + + memcpy(src_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.m)); + memcpy(dst_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.m)); + memcpy(src_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.v)); + memcpy(dst_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.v)); + + if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "src_ipv6=%pI6 ", + &src_ipv6.v); + if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "dst_ipv6=%pI6 ", + &dst_ipv6.v); + } + } + +#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\ + MASK_VAL_L2(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + + PRINT_MASKED_VAL_L2(u8, ip_protocol, ip_protocol, p, "%02x"); + PRINT_MASKED_VAL_L2(u16, tcp_flags, tcp_flags, p, "%x"); + PRINT_MASKED_VAL_L2(u16, tcp_sport, tcp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, tcp_dport, tcp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_sport, udp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_dport, udp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, first_vid, first_vid, p, "%04x"); + PRINT_MASKED_VAL_L2(u8, first_prio, first_prio, p, "%x"); + PRINT_MASKED_VAL_L2(u8, first_cfi, first_cfi, p, "%d"); + PRINT_MASKED_VAL_L2(u8, ip_dscp, ip_dscp, p, "%02x"); + PRINT_MASKED_VAL_L2(u8, ip_ecn, ip_ecn, p, "%x"); + PRINT_MASKED_VAL_L2(u8, cvlan_tag, cvlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, svlan_tag, svlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, frag, frag, p, "%d"); +} + +static void print_misc_parameters_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_MISC(type, name, fld) \ + MASK_VAL(type, fte_match_set_misc, name, mask, value, fld) +#define PRINT_MASKED_VAL_MISC(type, name, fld, p, format) {\ + MASK_VAL_MISC(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + DECLARE_MASK_VAL(u64, gre_key) = { + .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 | + MLX5_GET(fte_match_set_misc, mask, gre_key_l), + .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 | + MLX5_GET(fte_match_set_misc, value, gre_key_l)}; + + PRINT_MASKED_VAL(gre_key, p, "%llu"); + PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, source_port, source_port, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_prio, outer_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_cfi, outer_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, outer_second_vid, outer_second_vid, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_prio, inner_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cfi, inner_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, inner_second_vid, inner_second_vid, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, outer_second_cvlan_tag, + outer_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cvlan_tag, + inner_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_svlan_tag, + outer_second_svlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_svlan_tag, + inner_second_svlan_tag, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, gre_protocol, gre_protocol, p, "%u"); + + PRINT_MASKED_VAL_MISC(u32, vxlan_vni, vxlan_vni, p, "%u"); + PRINT_MASKED_VAL_MISC(u32, outer_ipv6_flow_label, outer_ipv6_flow_label, + p, "%x"); + PRINT_MASKED_VAL_MISC(u32, inner_ipv6_flow_label, inner_ipv6_flow_label, + p, "%x"); +} + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner) +{ + const char *ret = trace_seq_buffer_ptr(p); + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { + trace_seq_printf(p, "[outer] "); + print_lyr_2_4_hdrs(p, mask_outer, value_outer); + } + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { + trace_seq_printf(p, "[misc] "); + print_misc_parameters_hdrs(p, mask_misc, value_misc); + } + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { + trace_seq_printf(p, "[inner] "); + print_lyr_2_4_hdrs(p, mask_inner, value_inner); + } + trace_seq_putc(p, 0); + return ret; +} + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id) +{ + const char *ret = trace_seq_buffer_ptr(p); + + switch (dst->type) { + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + trace_seq_printf(p, "vport=%u\n", dst->vport.num); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + trace_seq_printf(p, "ft=%p\n", dst->ft); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + trace_seq_printf(p, "ft_num=%u\n", dst->ft_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_TIR: + trace_seq_printf(p, "tir=%u\n", dst->tir_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_COUNTER: + trace_seq_printf(p, "counter_id=%u\n", counter_id); + break; + case MLX5_FLOW_DESTINATION_TYPE_PORT: + trace_seq_printf(p, "port\n"); + break; + } + + trace_seq_putc(p, 0); + return ret; +} + +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_rule); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_rule); + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h new file mode 100644 index 000000000..0240aee91 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(_MLX5_FS_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_FS_TP_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> +#include "../fs_core.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#define __parse_fs_hdrs(match_criteria_enable, mouter, mmisc, minner, vouter, \ + vinner, vmisc) \ + parse_fs_hdrs(p, match_criteria_enable, mouter, mmisc, minner, vouter,\ + vinner, vmisc) + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner); + +#define __parse_fs_dst(dst, counter_id) \ + parse_fs_dst(p, (const struct mlx5_flow_destination *)dst, counter_id) + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id); + +TRACE_EVENT(mlx5_fs_add_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(const struct mlx5_flow_table *, ft) + __field(u32, start_index) + __field(u32, end_index) + __field(u32, id) + __field(u8, mask_enable) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fg = fg; + fs_get_obj(__entry->ft, fg->node.parent); + __entry->start_index = fg->start_index; + __entry->end_index = fg->start_index + fg->max_ftes; + __entry->id = fg->id; + __entry->mask_enable = fg->mask.match_criteria_enable; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + + ), + TP_printk("fg=%p ft=%p id=%u start=%u end=%u bit_mask=%02x %s\n", + __entry->fg, __entry->ft, __entry->id, + __entry->start_index, __entry->end_index, + __entry->mask_enable, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(u32, id) + ), + TP_fast_assign( + __entry->fg = fg; + __entry->id = fg->id; + + ), + TP_printk("fg=%p id=%u\n", + __entry->fg, __entry->id) + ); + +#define ACTION_FLAGS \ + {MLX5_FLOW_CONTEXT_ACTION_ALLOW, "ALLOW"},\ + {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\ + {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\ + {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH, "VLAN_PUSH"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP, "VLAN_POP"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2, "VLAN_PUSH_2"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2, "VLAN_POP_2"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"} + +TRACE_EVENT(mlx5_fs_set_fte, + TP_PROTO(const struct fs_fte *fte, int new_fte), + TP_ARGS(fte, new_fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(const struct mlx5_flow_group *, fg) + __field(u32, group_index) + __field(u32, index) + __field(u32, action) + __field(u32, flow_tag) + __field(u8, mask_enable) + __field(int, new_fte) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + __array(u32, value_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->new_fte = new_fte; + fs_get_obj(__entry->fg, fte->node.parent); + __entry->group_index = __entry->fg->id; + __entry->index = fte->index; + __entry->action = fte->action.action; + __entry->mask_enable = __entry->fg->mask.match_criteria_enable; + __entry->flow_tag = fte->action.flow_tag; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + memcpy(__entry->value_outer, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + outer_headers), + sizeof(__entry->value_outer)); + memcpy(__entry->value_inner, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + inner_headers), + sizeof(__entry->value_inner)); + memcpy(__entry->value_misc, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + misc_parameters), + sizeof(__entry->value_misc)); + + ), + TP_printk("op=%s fte=%p fg=%p index=%u group_index=%u action=<%s> flow_tag=%x %s\n", + __entry->new_fte ? "add" : "set", + __entry->fte, __entry->fg, __entry->index, + __entry->group_index, __print_flags(__entry->action, "|", + ACTION_FLAGS), + __entry->flow_tag, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->value_outer, + __entry->value_misc, + __entry->value_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fte, + TP_PROTO(const struct fs_fte *fte), + TP_ARGS(fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(u32, index) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->index = fte->index; + + ), + TP_printk("fte=%p index=%u\n", + __entry->fte, __entry->index) + ); + +TRACE_EVENT(mlx5_fs_add_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + __field(u32, sw_action) + __field(u32, index) + __field(u32, counter_id) + __array(u8, destination, sizeof(struct mlx5_flow_destination)) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + __entry->index = __entry->fte->dests_size - 1; + __entry->sw_action = rule->sw_action; + memcpy(__entry->destination, + &rule->dest_attr, + sizeof(__entry->destination)); + if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER && + rule->dest_attr.counter) + __entry->counter_id = + rule->dest_attr.counter->id; + ), + TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n", + __entry->rule, __entry->fte, __entry->index, + __print_flags(__entry->sw_action, "|", ACTION_FLAGS), + __parse_fs_dst(__entry->destination, __entry->counter_id)) + ); + +TRACE_EVENT(mlx5_fs_del_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + ), + TP_printk("rule=%p fte=%p\n", + __entry->rule, __entry->fte) + ); +#endif + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE fs_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c new file mode 100644 index 000000000..a22e932a0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -0,0 +1,950 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define CREATE_TRACE_POINTS +#include "fw_tracer.h" +#include "fw_tracer_tracepoint.h" + +static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer) +{ + u32 *string_db_base_address_out = tracer->str_db.base_address_out; + u32 *string_db_size_out = tracer->str_db.size_out; + struct mlx5_core_dev *dev = tracer->dev; + u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + void *mtrc_cap_sp; + int err, i; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTRC_CAP, 0, 0); + if (err) { + mlx5_core_warn(dev, "FWTracer: Error reading tracer caps %d\n", + err); + return err; + } + + if (!MLX5_GET(mtrc_cap, out, trace_to_memory)) { + mlx5_core_dbg(dev, "FWTracer: Device does not support logging traces to memory\n"); + return -ENOTSUPP; + } + + tracer->trc_ver = MLX5_GET(mtrc_cap, out, trc_ver); + tracer->str_db.first_string_trace = + MLX5_GET(mtrc_cap, out, first_string_trace); + tracer->str_db.num_string_trace = + MLX5_GET(mtrc_cap, out, num_string_trace); + tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db); + tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner); + + for (i = 0; i < tracer->str_db.num_string_db; i++) { + mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]); + string_db_base_address_out[i] = MLX5_GET(mtrc_string_db_param, + mtrc_cap_sp, + string_db_base_address); + string_db_size_out[i] = MLX5_GET(mtrc_string_db_param, + mtrc_cap_sp, string_db_size); + } + + return err; +} + +static int mlx5_set_mtrc_caps_trace_owner(struct mlx5_fw_tracer *tracer, + u32 *out, u32 out_size, + u8 trace_owner) +{ + struct mlx5_core_dev *dev = tracer->dev; + u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + + MLX5_SET(mtrc_cap, in, trace_owner, trace_owner); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, out_size, + MLX5_REG_MTRC_CAP, 0, 1); +} + +static int mlx5_fw_tracer_ownership_acquire(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + int err; + + err = mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out), + MLX5_FW_TRACER_ACQUIRE_OWNERSHIP); + if (err) { + mlx5_core_warn(dev, "FWTracer: Acquire tracer ownership failed %d\n", + err); + return err; + } + + tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner); + + if (!tracer->owner) + return -EBUSY; + + return 0; +} + +static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer) +{ + u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + + mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out), + MLX5_FW_TRACER_RELEASE_OWNERSHIP); + tracer->owner = false; +} + +static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + struct device *ddev = &dev->pdev->dev; + dma_addr_t dma; + void *buff; + gfp_t gfp; + int err; + + tracer->buff.size = TRACE_BUFFER_SIZE_BYTE; + + gfp = GFP_KERNEL | __GFP_ZERO; + buff = (void *)__get_free_pages(gfp, + get_order(tracer->buff.size)); + if (!buff) { + err = -ENOMEM; + mlx5_core_warn(dev, "FWTracer: Failed to allocate pages, %d\n", err); + return err; + } + tracer->buff.log_buf = buff; + + dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE); + if (dma_mapping_error(ddev, dma)) { + mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n", + dma_mapping_error(ddev, dma)); + err = -ENOMEM; + goto free_pages; + } + tracer->buff.dma = dma; + + return 0; + +free_pages: + free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size)); + + return err; +} + +static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + struct device *ddev = &dev->pdev->dev; + + if (!tracer->buff.log_buf) + return; + + dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE); + free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size)); +} + +static int mlx5_fw_tracer_create_mkey(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + int err, inlen, i; + __be64 *mtt; + void *mkc; + u32 *in; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + + sizeof(*mtt) * round_up(TRACER_BUFFER_PAGE_NUM, 2); + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2)); + mtt = (u64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0 ; i < TRACER_BUFFER_PAGE_NUM ; i++) + mtt[i] = cpu_to_be64(tracer->buff.dma + i * PAGE_SIZE); + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, pd, tracer->buff.pdn); + MLX5_SET(mkc, mkc, bsf_octword_size, 0); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); + MLX5_SET(mkc, mkc, translations_octword_size, + DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2)); + MLX5_SET64(mkc, mkc, start_addr, tracer->buff.dma); + MLX5_SET64(mkc, mkc, len, tracer->buff.size); + err = mlx5_core_create_mkey(dev, &tracer->buff.mkey, in, inlen); + if (err) + mlx5_core_warn(dev, "FWTracer: Failed to create mkey, %d\n", err); + + kvfree(in); + + return err; +} + +static void mlx5_fw_tracer_free_strings_db(struct mlx5_fw_tracer *tracer) +{ + u32 num_string_db = tracer->str_db.num_string_db; + int i; + + for (i = 0; i < num_string_db; i++) { + kfree(tracer->str_db.buffer[i]); + tracer->str_db.buffer[i] = NULL; + } +} + +static int mlx5_fw_tracer_allocate_strings_db(struct mlx5_fw_tracer *tracer) +{ + u32 *string_db_size_out = tracer->str_db.size_out; + u32 num_string_db = tracer->str_db.num_string_db; + int i; + + for (i = 0; i < num_string_db; i++) { + tracer->str_db.buffer[i] = kzalloc(string_db_size_out[i], GFP_KERNEL); + if (!tracer->str_db.buffer[i]) + goto free_strings_db; + } + + return 0; + +free_strings_db: + mlx5_fw_tracer_free_strings_db(tracer); + return -ENOMEM; +} + +static void mlx5_tracer_read_strings_db(struct work_struct *work) +{ + struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer, + read_fw_strings_work); + u32 num_of_reads, num_string_db = tracer->str_db.num_string_db; + struct mlx5_core_dev *dev = tracer->dev; + u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + u32 leftovers, offset; + int err = 0, i, j; + u32 *out, outlen; + void *out_value; + + outlen = MLX5_ST_SZ_BYTES(mtrc_stdb) + STRINGS_DB_READ_SIZE_BYTES; + out = kzalloc(outlen, GFP_KERNEL); + if (!out) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < num_string_db; i++) { + offset = 0; + MLX5_SET(mtrc_stdb, in, string_db_index, i); + num_of_reads = tracer->str_db.size_out[i] / + STRINGS_DB_READ_SIZE_BYTES; + leftovers = (tracer->str_db.size_out[i] % + STRINGS_DB_READ_SIZE_BYTES) / + STRINGS_DB_LEFTOVER_SIZE_BYTES; + + MLX5_SET(mtrc_stdb, in, read_size, STRINGS_DB_READ_SIZE_BYTES); + for (j = 0; j < num_of_reads; j++) { + MLX5_SET(mtrc_stdb, in, start_offset, offset); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + outlen, MLX5_REG_MTRC_STDB, + 0, 1); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n", + err); + goto out_free; + } + + out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data); + memcpy(tracer->str_db.buffer[i] + offset, out_value, + STRINGS_DB_READ_SIZE_BYTES); + offset += STRINGS_DB_READ_SIZE_BYTES; + } + + /* Strings database is aligned to 64, need to read leftovers*/ + MLX5_SET(mtrc_stdb, in, read_size, + STRINGS_DB_LEFTOVER_SIZE_BYTES); + for (j = 0; j < leftovers; j++) { + MLX5_SET(mtrc_stdb, in, start_offset, offset); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + outlen, MLX5_REG_MTRC_STDB, + 0, 1); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n", + err); + goto out_free; + } + + out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data); + memcpy(tracer->str_db.buffer[i] + offset, out_value, + STRINGS_DB_LEFTOVER_SIZE_BYTES); + offset += STRINGS_DB_LEFTOVER_SIZE_BYTES; + } + } + + tracer->str_db.loaded = true; + +out_free: + kfree(out); +out: + return; +} + +static void mlx5_fw_tracer_arm(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0}; + int err; + + MLX5_SET(mtrc_ctrl, in, arm_event, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTRC_CTRL, 0, 1); + if (err) + mlx5_core_warn(dev, "FWTracer: Failed to arm tracer event %d\n", err); +} + +static const char *VAL_PARM = "%llx"; +static const char *REPLACE_64_VAL_PARM = "%x%x"; +static const char *PARAM_CHAR = "%"; + +static int mlx5_tracer_message_hash(u32 message_id) +{ + return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1); +} + +static struct tracer_string_format *mlx5_tracer_message_insert(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct hlist_head *head = + &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)]; + struct tracer_string_format *cur_string; + + cur_string = kzalloc(sizeof(*cur_string), GFP_KERNEL); + if (!cur_string) + return NULL; + + hlist_add_head(&cur_string->hlist, head); + + return cur_string; +} + +static struct tracer_string_format *mlx5_tracer_get_string(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct tracer_string_format *cur_string; + u32 str_ptr, offset; + int i; + + str_ptr = tracer_event->string_event.string_param; + + for (i = 0; i < tracer->str_db.num_string_db; i++) { + if (str_ptr > tracer->str_db.base_address_out[i] && + str_ptr < tracer->str_db.base_address_out[i] + + tracer->str_db.size_out[i]) { + offset = str_ptr - tracer->str_db.base_address_out[i]; + /* add it to the hash */ + cur_string = mlx5_tracer_message_insert(tracer, tracer_event); + if (!cur_string) + return NULL; + cur_string->string = (char *)(tracer->str_db.buffer[i] + + offset); + return cur_string; + } + } + + return NULL; +} + +static void mlx5_tracer_clean_message(struct tracer_string_format *str_frmt) +{ + hlist_del(&str_frmt->hlist); + kfree(str_frmt); +} + +static int mlx5_tracer_get_num_of_params(char *str) +{ + char *substr, *pstr = str; + int num_of_params = 0; + + /* replace %llx with %x%x */ + substr = strstr(pstr, VAL_PARM); + while (substr) { + memcpy(substr, REPLACE_64_VAL_PARM, 4); + pstr = substr; + substr = strstr(pstr, VAL_PARM); + } + + /* count all the % characters */ + substr = strstr(str, PARAM_CHAR); + while (substr) { + num_of_params += 1; + str = substr + 1; + substr = strstr(str, PARAM_CHAR); + } + + return num_of_params; +} + +static struct tracer_string_format *mlx5_tracer_message_find(struct hlist_head *head, + u8 event_id, u32 tmsn) +{ + struct tracer_string_format *message; + + hlist_for_each_entry(message, head, hlist) + if (message->event_id == event_id && message->tmsn == tmsn) + return message; + + return NULL; +} + +static struct tracer_string_format *mlx5_tracer_message_get(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct hlist_head *head = + &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)]; + + return mlx5_tracer_message_find(head, tracer_event->event_id, tracer_event->string_event.tmsn); +} + +static void poll_trace(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event, u64 *trace) +{ + u32 timestamp_low, timestamp_mid, timestamp_high, urts; + + tracer_event->event_id = MLX5_GET(tracer_event, trace, event_id); + tracer_event->lost_event = MLX5_GET(tracer_event, trace, lost); + + switch (tracer_event->event_id) { + case TRACER_EVENT_TYPE_TIMESTAMP: + tracer_event->type = TRACER_EVENT_TYPE_TIMESTAMP; + urts = MLX5_GET(tracer_timestamp_event, trace, urts); + if (tracer->trc_ver == 0) + tracer_event->timestamp_event.unreliable = !!(urts >> 2); + else + tracer_event->timestamp_event.unreliable = !!(urts & 1); + + timestamp_low = MLX5_GET(tracer_timestamp_event, + trace, timestamp7_0); + timestamp_mid = MLX5_GET(tracer_timestamp_event, + trace, timestamp39_8); + timestamp_high = MLX5_GET(tracer_timestamp_event, + trace, timestamp52_40); + + tracer_event->timestamp_event.timestamp = + ((u64)timestamp_high << 40) | + ((u64)timestamp_mid << 8) | + (u64)timestamp_low; + break; + default: + if (tracer_event->event_id >= tracer->str_db.first_string_trace || + tracer_event->event_id <= tracer->str_db.first_string_trace + + tracer->str_db.num_string_trace) { + tracer_event->type = TRACER_EVENT_TYPE_STRING; + tracer_event->string_event.timestamp = + MLX5_GET(tracer_string_event, trace, timestamp); + tracer_event->string_event.string_param = + MLX5_GET(tracer_string_event, trace, string_param); + tracer_event->string_event.tmsn = + MLX5_GET(tracer_string_event, trace, tmsn); + tracer_event->string_event.tdsn = + MLX5_GET(tracer_string_event, trace, tdsn); + } else { + tracer_event->type = TRACER_EVENT_TYPE_UNRECOGNIZED; + } + break; + } +} + +static u64 get_block_timestamp(struct mlx5_fw_tracer *tracer, u64 *ts_event) +{ + struct tracer_event tracer_event; + u8 event_id; + + event_id = MLX5_GET(tracer_event, ts_event, event_id); + + if (event_id == TRACER_EVENT_TYPE_TIMESTAMP) + poll_trace(tracer, &tracer_event, ts_event); + else + tracer_event.timestamp_event.timestamp = 0; + + return tracer_event.timestamp_event.timestamp; +} + +static void mlx5_fw_tracer_clean_print_hash(struct mlx5_fw_tracer *tracer) +{ + struct tracer_string_format *str_frmt; + struct hlist_node *n; + int i; + + for (i = 0; i < MESSAGE_HASH_SIZE; i++) { + hlist_for_each_entry_safe(str_frmt, n, &tracer->hash[i], hlist) + mlx5_tracer_clean_message(str_frmt); + } +} + +static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer) +{ + struct tracer_string_format *str_frmt, *tmp_str; + + list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list, + list) + list_del(&str_frmt->list); +} + +static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, + struct mlx5_core_dev *dev, + u64 trace_timestamp) +{ + char tmp[512]; + + snprintf(tmp, sizeof(tmp), str_frmt->string, + str_frmt->params[0], + str_frmt->params[1], + str_frmt->params[2], + str_frmt->params[3], + str_frmt->params[4], + str_frmt->params[5], + str_frmt->params[6]); + + trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost, + str_frmt->event_id, tmp); + + /* remove it from hash */ + mlx5_tracer_clean_message(str_frmt); +} + +static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct tracer_string_format *cur_string; + + if (tracer_event->string_event.tdsn == 0) { + cur_string = mlx5_tracer_get_string(tracer, tracer_event); + if (!cur_string) + return -1; + + cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string); + cur_string->last_param_num = 0; + cur_string->event_id = tracer_event->event_id; + cur_string->tmsn = tracer_event->string_event.tmsn; + cur_string->timestamp = tracer_event->string_event.timestamp; + cur_string->lost = tracer_event->lost_event; + if (cur_string->num_of_params == 0) /* trace with no params */ + list_add_tail(&cur_string->list, &tracer->ready_strings_list); + } else { + cur_string = mlx5_tracer_message_get(tracer, tracer_event); + if (!cur_string) { + pr_debug("%s Got string event for unknown string tdsm: %d\n", + __func__, tracer_event->string_event.tmsn); + return -1; + } + cur_string->last_param_num += 1; + if (cur_string->last_param_num > TRACER_MAX_PARAMS) { + pr_debug("%s Number of params exceeds the max (%d)\n", + __func__, TRACER_MAX_PARAMS); + list_add_tail(&cur_string->list, &tracer->ready_strings_list); + return 0; + } + /* keep the new parameter */ + cur_string->params[cur_string->last_param_num - 1] = + tracer_event->string_event.string_param; + if (cur_string->last_param_num == cur_string->num_of_params) + list_add_tail(&cur_string->list, &tracer->ready_strings_list); + } + + return 0; +} + +static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct tracer_timestamp_event timestamp_event = + tracer_event->timestamp_event; + struct tracer_string_format *str_frmt, *tmp_str; + struct mlx5_core_dev *dev = tracer->dev; + u64 trace_timestamp; + + list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list, list) { + list_del(&str_frmt->list); + if (str_frmt->timestamp < (timestamp_event.timestamp & MASK_6_0)) + trace_timestamp = (timestamp_event.timestamp & MASK_52_7) | + (str_frmt->timestamp & MASK_6_0); + else + trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) | + (str_frmt->timestamp & MASK_6_0); + + mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp); + } +} + +static int mlx5_tracer_handle_trace(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + if (tracer_event->type == TRACER_EVENT_TYPE_STRING) { + mlx5_tracer_handle_string_trace(tracer, tracer_event); + } else if (tracer_event->type == TRACER_EVENT_TYPE_TIMESTAMP) { + if (!tracer_event->timestamp_event.unreliable) + mlx5_tracer_handle_timestamp_trace(tracer, tracer_event); + } else { + pr_debug("%s Got unrecognised type %d for parsing, exiting..\n", + __func__, tracer_event->type); + } + return 0; +} + +static void mlx5_fw_tracer_handle_traces(struct work_struct *work) +{ + struct mlx5_fw_tracer *tracer = + container_of(work, struct mlx5_fw_tracer, handle_traces_work); + u64 block_timestamp, last_block_timestamp, tmp_trace_block[TRACES_PER_BLOCK]; + u32 block_count, start_offset, prev_start_offset, prev_consumer_index; + u32 trace_event_size = MLX5_ST_SZ_BYTES(tracer_event); + struct mlx5_core_dev *dev = tracer->dev; + struct tracer_event tracer_event; + int i; + + mlx5_core_dbg(dev, "FWTracer: Handle Trace event, owner=(%d)\n", tracer->owner); + if (!tracer->owner) + return; + + if (unlikely(!tracer->str_db.loaded)) + goto arm; + + block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE; + start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE; + + /* Copy the block to local buffer to avoid HW override while being processed*/ + memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset, + TRACER_BLOCK_SIZE_BYTE); + + block_timestamp = + get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]); + + while (block_timestamp > tracer->last_timestamp) { + /* Check block override if its not the first block */ + if (!tracer->last_timestamp) { + u64 *ts_event; + /* To avoid block override be the HW in case of buffer + * wraparound, the time stamp of the previous block + * should be compared to the last timestamp handled + * by the driver. + */ + prev_consumer_index = + (tracer->buff.consumer_index - 1) & (block_count - 1); + prev_start_offset = prev_consumer_index * TRACER_BLOCK_SIZE_BYTE; + + ts_event = tracer->buff.log_buf + prev_start_offset + + (TRACES_PER_BLOCK - 1) * trace_event_size; + last_block_timestamp = get_block_timestamp(tracer, ts_event); + /* If previous timestamp different from last stored + * timestamp then there is a good chance that the + * current buffer is overwritten and therefore should + * not be parsed. + */ + if (tracer->last_timestamp != last_block_timestamp) { + mlx5_core_warn(dev, "FWTracer: Events were lost\n"); + tracer->last_timestamp = block_timestamp; + tracer->buff.consumer_index = + (tracer->buff.consumer_index + 1) & (block_count - 1); + break; + } + } + + /* Parse events */ + for (i = 0; i < TRACES_PER_BLOCK ; i++) { + poll_trace(tracer, &tracer_event, &tmp_trace_block[i]); + mlx5_tracer_handle_trace(tracer, &tracer_event); + } + + tracer->buff.consumer_index = + (tracer->buff.consumer_index + 1) & (block_count - 1); + + tracer->last_timestamp = block_timestamp; + start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE; + memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset, + TRACER_BLOCK_SIZE_BYTE); + block_timestamp = get_block_timestamp(tracer, + &tmp_trace_block[TRACES_PER_BLOCK - 1]); + } + +arm: + mlx5_fw_tracer_arm(dev); +} + +static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + u32 out[MLX5_ST_SZ_DW(mtrc_conf)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtrc_conf)] = {0}; + int err; + + MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY); + MLX5_SET(mtrc_conf, in, log_trace_buffer_size, + ilog2(TRACER_BUFFER_PAGE_NUM)); + MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey.key); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTRC_CONF, 0, 1); + if (err) + mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err); + + return err; +} + +static int mlx5_fw_tracer_set_mtrc_ctrl(struct mlx5_fw_tracer *tracer, u8 status, u8 arm) +{ + struct mlx5_core_dev *dev = tracer->dev; + u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0}; + int err; + + MLX5_SET(mtrc_ctrl, in, modify_field_select, TRACE_STATUS); + MLX5_SET(mtrc_ctrl, in, trace_status, status); + MLX5_SET(mtrc_ctrl, in, arm_event, arm); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTRC_CTRL, 0, 1); + + if (!err && status) + tracer->last_timestamp = 0; + + return err; +} + +static int mlx5_fw_tracer_start(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + int err; + + err = mlx5_fw_tracer_ownership_acquire(tracer); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err); + /* Don't fail since ownership can be acquired on a later FW event */ + return 0; + } + + err = mlx5_fw_tracer_set_mtrc_conf(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to set tracer configuration %d\n", err); + goto release_ownership; + } + + /* enable tracer & trace events */ + err = mlx5_fw_tracer_set_mtrc_ctrl(tracer, 1, 1); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to enable tracer %d\n", err); + goto release_ownership; + } + + mlx5_core_dbg(dev, "FWTracer: Ownership granted and active\n"); + return 0; + +release_ownership: + mlx5_fw_tracer_ownership_release(tracer); + return err; +} + +static void mlx5_fw_tracer_ownership_change(struct work_struct *work) +{ + struct mlx5_fw_tracer *tracer = + container_of(work, struct mlx5_fw_tracer, ownership_change_work); + + mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner); + if (tracer->owner) { + tracer->owner = false; + tracer->buff.consumer_index = 0; + return; + } + + mlx5_fw_tracer_start(tracer); +} + +/* Create software resources (Buffers, etc ..) */ +struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_tracer *tracer = NULL; + int err; + + if (!MLX5_CAP_MCAM_REG(dev, tracer_registers)) { + mlx5_core_dbg(dev, "FWTracer: Tracer capability not present\n"); + return NULL; + } + + tracer = kvzalloc(sizeof(*tracer), GFP_KERNEL); + if (!tracer) + return ERR_PTR(-ENOMEM); + + tracer->work_queue = create_singlethread_workqueue("mlx5_fw_tracer"); + if (!tracer->work_queue) { + err = -ENOMEM; + goto free_tracer; + } + + tracer->dev = dev; + + INIT_LIST_HEAD(&tracer->ready_strings_list); + INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change); + INIT_WORK(&tracer->read_fw_strings_work, mlx5_tracer_read_strings_db); + INIT_WORK(&tracer->handle_traces_work, mlx5_fw_tracer_handle_traces); + + + err = mlx5_query_mtrc_caps(tracer); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err); + goto destroy_workqueue; + } + + err = mlx5_fw_tracer_create_log_buf(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Create log buffer failed %d\n", err); + goto destroy_workqueue; + } + + err = mlx5_fw_tracer_allocate_strings_db(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Allocate strings database failed %d\n", err); + goto free_log_buf; + } + + mlx5_core_dbg(dev, "FWTracer: Tracer created\n"); + + return tracer; + +free_log_buf: + mlx5_fw_tracer_destroy_log_buf(tracer); +destroy_workqueue: + tracer->dev = NULL; + destroy_workqueue(tracer->work_queue); +free_tracer: + kvfree(tracer); + return ERR_PTR(err); +} + +/* Create HW resources + start tracer + * must be called before Async EQ is created + */ +int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev; + int err; + + if (IS_ERR_OR_NULL(tracer)) + return 0; + + dev = tracer->dev; + + if (!tracer->str_db.loaded) + queue_work(tracer->work_queue, &tracer->read_fw_strings_work); + + err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err); + return err; + } + + err = mlx5_fw_tracer_create_mkey(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to create mkey %d\n", err); + goto err_dealloc_pd; + } + + mlx5_fw_tracer_start(tracer); + + return 0; + +err_dealloc_pd: + mlx5_core_dealloc_pd(dev, tracer->buff.pdn); + return err; +} + +/* Stop tracer + Cleanup HW resources + * must be called after Async EQ is destroyed + */ +void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) +{ + if (IS_ERR_OR_NULL(tracer)) + return; + + mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n", + tracer->owner); + + cancel_work_sync(&tracer->ownership_change_work); + cancel_work_sync(&tracer->handle_traces_work); + + if (tracer->owner) + mlx5_fw_tracer_ownership_release(tracer); + + mlx5_core_destroy_mkey(tracer->dev, &tracer->buff.mkey); + mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn); +} + +/* Free software resources (Buffers, etc ..) */ +void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) +{ + if (IS_ERR_OR_NULL(tracer)) + return; + + mlx5_core_dbg(tracer->dev, "FWTracer: Destroy\n"); + + cancel_work_sync(&tracer->read_fw_strings_work); + mlx5_fw_tracer_clean_ready_list(tracer); + mlx5_fw_tracer_clean_print_hash(tracer); + mlx5_fw_tracer_free_strings_db(tracer); + mlx5_fw_tracer_destroy_log_buf(tracer); + flush_workqueue(tracer->work_queue); + destroy_workqueue(tracer->work_queue); + kvfree(tracer); +} + +void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) +{ + struct mlx5_fw_tracer *tracer = dev->tracer; + + if (!tracer) + return; + + switch (eqe->sub_type) { + case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE: + if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) + queue_work(tracer->work_queue, &tracer->ownership_change_work); + break; + case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE: + queue_work(tracer->work_queue, &tracer->handle_traces_work); + break; + default: + mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n", + eqe->sub_type); + } +} + +EXPORT_TRACEPOINT_SYMBOL(mlx5_fw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h new file mode 100644 index 000000000..0347f2dd5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_TRACER_H__ +#define __LIB_TRACER_H__ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +#define STRINGS_DB_SECTIONS_NUM 8 +#define STRINGS_DB_READ_SIZE_BYTES 256 +#define STRINGS_DB_LEFTOVER_SIZE_BYTES 64 +#define TRACER_BUFFER_PAGE_NUM 64 +#define TRACER_BUFFER_CHUNK 4096 +#define TRACE_BUFFER_SIZE_BYTE (TRACER_BUFFER_PAGE_NUM * TRACER_BUFFER_CHUNK) + +#define TRACER_BLOCK_SIZE_BYTE 256 +#define TRACES_PER_BLOCK 32 + +#define TRACER_MAX_PARAMS 7 +#define MESSAGE_HASH_BITS 6 +#define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS) + +#define MASK_52_7 (0x1FFFFFFFFFFF80) +#define MASK_6_0 (0x7F) + +struct mlx5_fw_tracer { + struct mlx5_core_dev *dev; + bool owner; + u8 trc_ver; + struct workqueue_struct *work_queue; + struct work_struct ownership_change_work; + struct work_struct read_fw_strings_work; + + /* Strings DB */ + struct { + u8 first_string_trace; + u8 num_string_trace; + u32 num_string_db; + u32 base_address_out[STRINGS_DB_SECTIONS_NUM]; + u32 size_out[STRINGS_DB_SECTIONS_NUM]; + void *buffer[STRINGS_DB_SECTIONS_NUM]; + bool loaded; + } str_db; + + /* Log Buffer */ + struct { + u32 pdn; + void *log_buf; + dma_addr_t dma; + u32 size; + struct mlx5_core_mkey mkey; + u32 consumer_index; + } buff; + + u64 last_timestamp; + struct work_struct handle_traces_work; + struct hlist_head hash[MESSAGE_HASH_SIZE]; + struct list_head ready_strings_list; +}; + +struct tracer_string_format { + char *string; + int params[TRACER_MAX_PARAMS]; + int num_of_params; + int last_param_num; + u8 event_id; + u32 tmsn; + struct hlist_node hlist; + struct list_head list; + u32 timestamp; + bool lost; +}; + +enum mlx5_fw_tracer_ownership_state { + MLX5_FW_TRACER_RELEASE_OWNERSHIP, + MLX5_FW_TRACER_ACQUIRE_OWNERSHIP, +}; + +enum tracer_ctrl_fields_select { + TRACE_STATUS = 1 << 0, +}; + +enum tracer_event_type { + TRACER_EVENT_TYPE_STRING, + TRACER_EVENT_TYPE_TIMESTAMP = 0xFF, + TRACER_EVENT_TYPE_UNRECOGNIZED, +}; + +enum tracing_mode { + TRACE_TO_MEMORY = 1 << 0, +}; + +struct tracer_timestamp_event { + u64 timestamp; + u8 unreliable; +}; + +struct tracer_string_event { + u32 timestamp; + u32 tmsn; + u32 tdsn; + u32 string_param; +}; + +struct tracer_event { + bool lost_event; + u32 type; + u8 event_id; + union { + struct tracer_string_event string_event; + struct tracer_timestamp_event timestamp_event; + }; +}; + +struct mlx5_ifc_tracer_event_bits { + u8 lost[0x1]; + u8 timestamp[0x7]; + u8 event_id[0x8]; + u8 event_data[0x30]; +}; + +struct mlx5_ifc_tracer_string_event_bits { + u8 lost[0x1]; + u8 timestamp[0x7]; + u8 event_id[0x8]; + u8 tmsn[0xd]; + u8 tdsn[0x3]; + u8 string_param[0x20]; +}; + +struct mlx5_ifc_tracer_timestamp_event_bits { + u8 timestamp7_0[0x8]; + u8 event_id[0x8]; + u8 urts[0x3]; + u8 timestamp52_40[0xd]; + u8 timestamp39_8[0x20]; +}; + +struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev); +int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer); +void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer); +void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer); +void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h new file mode 100644 index 000000000..83f90e9af --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(__LIB_TRACER_TRACEPOINT_H__) || defined(TRACE_HEADER_MULTI_READ) +#define __LIB_TRACER_TRACEPOINT_H__ + +#include <linux/tracepoint.h> +#include "fw_tracer.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +/* Tracepoint for FWTracer messages: */ +TRACE_EVENT(mlx5_fw, + TP_PROTO(const struct mlx5_fw_tracer *tracer, u64 trace_timestamp, + bool lost, u8 event_id, const char *msg), + + TP_ARGS(tracer, trace_timestamp, lost, event_id, msg), + + TP_STRUCT__entry( + __string(dev_name, dev_name(&tracer->dev->pdev->dev)) + __field(u64, trace_timestamp) + __field(bool, lost) + __field(u8, event_id) + __string(msg, msg) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name(&tracer->dev->pdev->dev)); + __entry->trace_timestamp = trace_timestamp; + __entry->lost = lost; + __entry->event_id = event_id; + __assign_str(msg, msg); + ), + + TP_printk("%s [0x%llx] %d [0x%x] %s", + __get_str(dev_name), + __entry->trace_timestamp, + __entry->lost, __entry->event_id, + __get_str(msg)) +); + +#endif + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ./diag +#define TRACE_INCLUDE_FILE fw_tracer_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h new file mode 100644 index 000000000..d79e177f8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -0,0 +1,979 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_H__ +#define __MLX5_EN_H__ + +#include <linux/if_vlan.h> +#include <linux/etherdevice.h> +#include <linux/timecounter.h> +#include <linux/net_tstamp.h> +#include <linux/ptp_clock_kernel.h> +#include <linux/crash_dump.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/port.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/transobj.h> +#include <linux/mlx5/fs.h> +#include <linux/rhashtable.h> +#include <net/switchdev.h> +#include <net/xdp.h> +#include <linux/net_dim.h> +#include "wq.h" +#include "mlx5_core.h" +#include "en_stats.h" +#include "en/fs.h" + +extern const struct net_device_ops mlx5e_netdev_ops; +struct page_pool; + +#define MLX5E_METADATA_ETHER_TYPE (0x8CE4) +#define MLX5E_METADATA_ETHER_LEN 8 + +#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) + +#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) + +#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) +#define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu)) + +#define MLX5E_MAX_PRIORITY 8 +#define MLX5E_MAX_DSCP 64 +#define MLX5E_MAX_NUM_TC 8 + +#define MLX5_RX_HEADROOM NET_SKB_PAD +#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + +#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ + (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ +#define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \ + max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) +#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) +#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) +#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \ + (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \ + MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)) + +#define MLX5_MPWRQ_LOG_WQE_SZ 18 +#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ + MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) +#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) + +#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) +#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8)) +#define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS)) +#define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS) +#define MLX5E_MAX_RQ_NUM_MTTS \ + ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ +#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024)) +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW \ + (ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS)) +#define MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW \ + (MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW + \ + (MLX5_MPWRQ_LOG_WQE_SZ - MLX5E_ORDER2_MAX_PACKET_MTU)) + +#define MLX5E_MIN_SKB_FRAG_SZ (MLX5_SKB_FRAG_SZ(MLX5_RX_HEADROOM)) +#define MLX5E_LOG_MAX_RX_WQE_BULK \ + (ilog2(PAGE_SIZE / roundup_pow_of_two(MLX5E_MIN_SKB_FRAG_SZ))) + +#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6 +#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd + +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE (1 + MLX5E_LOG_MAX_RX_WQE_BULK) +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE min_t(u8, 0xd, \ + MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW) + +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 + +#define MLX5E_RX_MAX_HEAD (256) + +#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) +#define MLX5E_DEFAULT_LRO_TIMEOUT 32 +#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 + +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE 0x10 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 + +#define MLX5E_LOG_INDIR_RQT_SIZE 0x7 +#define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) +#define MLX5E_MIN_NUM_CHANNELS 0x1 +#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) +#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) +#define MLX5E_TX_CQ_POLL_BUDGET 128 +#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ + +#define MLX5E_UMR_WQE_INLINE_SZ \ + (sizeof(struct mlx5e_umr_wqe) + \ + ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(struct mlx5_mtt), \ + MLX5_UMR_MTT_ALIGNMENT)) +#define MLX5E_UMR_WQEBBS \ + (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB)) +#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS + +#define MLX5E_NUM_MAIN_GROUPS 9 + +#define MLX5E_MSG_LEVEL NETIF_MSG_LINK + +#define mlx5e_dbg(mlevel, priv, format, ...) \ +do { \ + if (NETIF_MSG_##mlevel & (priv)->msglevel) \ + netdev_warn(priv->netdev, format, \ + ##__VA_ARGS__); \ +} while (0) + + +static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW, + wq_size / 2); + default: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES, + wq_size / 2); + } +} + +static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) +{ + return is_kdump_kernel() ? + MLX5E_MIN_NUM_CHANNELS : + min_t(int, mdev->priv.eq_table.num_comp_vectors, + MLX5E_MAX_NUM_CHANNELS); +} + +struct mlx5e_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_eth_seg eth; + struct mlx5_wqe_data_seg data[0]; +}; + +struct mlx5e_rx_wqe_ll { + struct mlx5_wqe_srq_next_seg next; + struct mlx5_wqe_data_seg data[0]; +}; + +struct mlx5e_rx_wqe_cyc { + struct mlx5_wqe_data_seg data[0]; +}; + +struct mlx5e_umr_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_umr_ctrl_seg uctrl; + struct mlx5_mkey_seg mkc; + struct mlx5_mtt inline_mtts[0]; +}; + +extern const char mlx5e_self_tests[][ETH_GSTRING_LEN]; + +static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { + "rx_cqe_moder", + "tx_cqe_moder", + "rx_cqe_compress", + "rx_striding_rq", + "rx_no_csum_complete", +}; + +enum mlx5e_priv_flag { + MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0), + MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1), + MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2), + MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3), + MLX5E_PFLAG_RX_NO_CSUM_COMPLETE = (1 << 4), +}; + +#define MLX5E_SET_PFLAG(params, pflag, enable) \ + do { \ + if (enable) \ + (params)->pflags |= (pflag); \ + else \ + (params)->pflags &= ~(pflag); \ + } while (0) + +#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag))) + +#ifdef CONFIG_MLX5_CORE_EN_DCB +#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ +#endif + +struct mlx5e_params { + u8 log_sq_size; + u8 rq_wq_type; + u8 log_rq_mtu_frames; + u16 num_channels; + u8 num_tc; + bool rx_cqe_compress_def; + struct net_dim_cq_moder rx_cq_moderation; + struct net_dim_cq_moder tx_cq_moderation; + bool lro_en; + u32 lro_wqe_sz; + u8 tx_min_inline_mode; + u8 rss_hfunc; + u8 toeplitz_hash_key[40]; + u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; + bool vlan_strip_disable; + bool scatter_fcs_en; + bool rx_dim_enabled; + bool tx_dim_enabled; + u32 lro_timeout; + u32 pflags; + struct bpf_prog *xdp_prog; + unsigned int sw_mtu; + int hard_mtu; +}; + +#ifdef CONFIG_MLX5_CORE_EN_DCB +struct mlx5e_cee_config { + /* bw pct for priority group */ + u8 pg_bw_pct[CEE_DCBX_MAX_PGS]; + u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO]; + bool pfc_setting[CEE_DCBX_MAX_PRIO]; + bool pfc_enable; +}; + +enum { + MLX5_DCB_CHG_RESET, + MLX5_DCB_NO_CHG, + MLX5_DCB_CHG_NO_RESET, +}; + +struct mlx5e_dcbx { + enum mlx5_dcbx_oper_mode mode; + struct mlx5e_cee_config cee_cfg; /* pending configuration */ + u8 dscp_app_cnt; + + /* The only setting that cannot be read from FW */ + u8 tc_tsa[IEEE_8021QAZ_MAX_TCS]; + u8 cap; + + /* Buffer configuration */ + bool manual_buffer; + u32 cable_len; + u32 xoff; +}; + +struct mlx5e_dcbx_dp { + u8 dscp2prio[MLX5E_MAX_DSCP]; + u8 trust_state; +}; +#endif + +enum { + MLX5E_RQ_STATE_ENABLED, + MLX5E_RQ_STATE_AM, + MLX5E_RQ_STATE_NO_CSUM_COMPLETE, +}; + +struct mlx5e_cq { + /* data path - accessed per cqe */ + struct mlx5_cqwq wq; + + /* data path - accessed per napi poll */ + u16 event_ctr; + struct napi_struct *napi; + struct mlx5_core_cq mcq; + struct mlx5e_channel *channel; + + /* cqe decompression */ + struct mlx5_cqe64 title; + struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE]; + u8 mini_arr_idx; + u16 decmprs_left; + u16 decmprs_wqe_counter; + + /* control */ + struct mlx5_core_dev *mdev; + struct mlx5_wq_ctrl wq_ctrl; +} ____cacheline_aligned_in_smp; + +struct mlx5e_tx_wqe_info { + struct sk_buff *skb; + u32 num_bytes; + u8 num_wqebbs; + u8 num_dma; +}; + +enum mlx5e_dma_map_type { + MLX5E_DMA_MAP_SINGLE, + MLX5E_DMA_MAP_PAGE +}; + +struct mlx5e_sq_dma { + dma_addr_t addr; + u32 size; + enum mlx5e_dma_map_type type; +}; + +enum { + MLX5E_SQ_STATE_ENABLED, + MLX5E_SQ_STATE_RECOVERING, + MLX5E_SQ_STATE_IPSEC, + MLX5E_SQ_STATE_AM, + MLX5E_SQ_STATE_TLS, + MLX5E_SQ_STATE_REDIRECT, +}; + +struct mlx5e_sq_wqe_info { + u8 opcode; +}; + +struct mlx5e_txqsq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + u32 dma_fifo_cc; + struct net_dim dim; /* Adaptive Moderation */ + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + u32 dma_fifo_pc; + + struct mlx5e_cq cq; + + /* read only */ + struct mlx5_wq_cyc wq; + u32 dma_fifo_mask; + struct mlx5e_sq_stats *stats; + struct { + struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_tx_wqe_info *wqe_info; + } db; + void __iomem *uar_map; + struct netdev_queue *txq; + u32 sqn; + u8 min_inline_mode; + struct device *pdev; + __be32 mkey_be; + unsigned long state; + struct hwtstamp_config *tstamp; + struct mlx5_clock *clock; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; + int txq_ix; + u32 rate_limit; + struct mlx5e_txqsq_recover { + struct work_struct recover_work; + u64 last_recover; + } recover; +} ____cacheline_aligned_in_smp; + +struct mlx5e_dma_info { + struct page *page; + dma_addr_t addr; +}; + +struct mlx5e_xdp_info { + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + struct mlx5e_dma_info di; +}; + +struct mlx5e_xdpsq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + bool redirect_flush; + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + bool doorbell; + + struct mlx5e_cq cq; + + /* read only */ + struct mlx5_wq_cyc wq; + struct mlx5e_xdpsq_stats *stats; + struct { + struct mlx5e_xdp_info *xdpi; + } db; + void __iomem *uar_map; + u32 sqn; + struct device *pdev; + __be32 mkey_be; + u8 min_inline_mode; + unsigned long state; + unsigned int hw_mtu; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; +} ____cacheline_aligned_in_smp; + +struct mlx5e_icosq { + /* data path */ + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + + struct mlx5e_cq cq; + + /* write@xmit, read@completion */ + struct { + struct mlx5e_sq_wqe_info *ico_wqe; + } db; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + u32 sqn; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; +} ____cacheline_aligned_in_smp; + +static inline bool +mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) +{ + return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc); +} + +struct mlx5e_wqe_frag_info { + struct mlx5e_dma_info *di; + u32 offset; + bool last_in_page; +}; + +struct mlx5e_umr_dma_info { + struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; +}; + +struct mlx5e_mpw_info { + struct mlx5e_umr_dma_info umr; + u16 consumed_strides; + DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); +}; + +#define MLX5E_MAX_RX_FRAGS 4 + +/* a single cache unit is capable to serve one napi call (for non-striding rq) + * or a MPWQE (for striding rq). + */ +#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ + MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT) +#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) +struct mlx5e_page_cache { + u32 head; + u32 tail; + struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE]; +}; + +struct mlx5e_rq; +typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); +typedef struct sk_buff * +(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx); +typedef struct sk_buff * +(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); +typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); +typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); + +enum mlx5e_rq_flag { + MLX5E_RQ_FLAG_XDP_XMIT = BIT(0), +}; + +struct mlx5e_rq_frag_info { + int frag_size; + int frag_stride; +}; + +struct mlx5e_rq_frags_info { + struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS]; + u8 num_frags; + u8 log_num_frags; + u8 wqe_bulk; +}; + +struct mlx5e_rq { + /* data path */ + union { + struct { + struct mlx5_wq_cyc wq; + struct mlx5e_wqe_frag_info *frags; + struct mlx5e_dma_info *di; + struct mlx5e_rq_frags_info info; + mlx5e_fp_skb_from_cqe skb_from_cqe; + } wqe; + struct { + struct mlx5_wq_ll wq; + struct mlx5e_umr_wqe umr_wqe; + struct mlx5e_mpw_info *info; + mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq; + u16 num_strides; + u8 log_stride_sz; + bool umr_in_progress; + } mpwqe; + }; + struct { + u16 headroom; + u8 map_dir; /* dma map direction */ + } buff; + + struct mlx5e_channel *channel; + struct device *pdev; + struct net_device *netdev; + struct mlx5e_rq_stats *stats; + struct mlx5e_cq cq; + struct mlx5e_page_cache page_cache; + struct hwtstamp_config *tstamp; + struct mlx5_clock *clock; + + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_post_rx_wqes post_wqes; + mlx5e_fp_dealloc_wqe dealloc_wqe; + + unsigned long state; + int ix; + unsigned int hw_mtu; + + struct net_dim dim; /* Dynamic Interrupt Moderation */ + + /* XDP */ + struct bpf_prog *xdp_prog; + struct mlx5e_xdpsq xdpsq; + DECLARE_BITMAP(flags, 8); + struct page_pool *page_pool; + + /* control */ + struct mlx5_wq_ctrl wq_ctrl; + __be32 mkey_be; + u8 wq_type; + u32 rqn; + struct mlx5_core_dev *mdev; + struct mlx5_core_mkey umr_mkey; + + /* XDP read-mostly */ + struct xdp_rxq_info xdp_rxq; +} ____cacheline_aligned_in_smp; + +struct mlx5e_channel { + /* data path */ + struct mlx5e_rq rq; + struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_icosq icosq; /* internal control operations */ + bool xdp; + struct napi_struct napi; + struct device *pdev; + struct net_device *netdev; + __be32 mkey_be; + u8 num_tc; + + /* XDP_REDIRECT */ + struct mlx5e_xdpsq xdpsq; + + /* data path - accessed per napi poll */ + struct irq_desc *irq_desc; + struct mlx5e_ch_stats *stats; + + /* control */ + struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; + struct hwtstamp_config *tstamp; + int ix; + int cpu; +}; + +struct mlx5e_channels { + struct mlx5e_channel **c; + unsigned int num; + struct mlx5e_params params; +}; + +struct mlx5e_channel_stats { + struct mlx5e_ch_stats ch; + struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq_stats rq; + struct mlx5e_xdpsq_stats rq_xdpsq; + struct mlx5e_xdpsq_stats xdpsq; +} ____cacheline_aligned_in_smp; + +enum { + MLX5E_STATE_ASYNC_EVENTS_ENABLED, + MLX5E_STATE_OPENED, + MLX5E_STATE_DESTROYING, + MLX5E_STATE_XDP_TX_ENABLED, +}; + +struct mlx5e_rqt { + u32 rqtn; + bool enabled; +}; + +struct mlx5e_tir { + u32 tirn; + struct mlx5e_rqt rqt; + struct list_head list; +}; + +enum { + MLX5E_TC_PRIO = 0, + MLX5E_NIC_PRIO +}; + +struct mlx5e_priv { + /* priv data path fields - start */ + struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; + int channel_tc2txq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; +#ifdef CONFIG_MLX5_CORE_EN_DCB + struct mlx5e_dcbx_dp dcbx_dp; +#endif + /* priv data path fields - end */ + + u32 msglevel; + unsigned long state; + struct mutex state_lock; /* Protects Interface state */ + struct mlx5e_rq drop_rq; + + struct mlx5e_channels channels; + u32 tisn[MLX5E_MAX_NUM_TC]; + struct mlx5e_rqt indir_rqt; + struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + u32 tx_rates[MLX5E_MAX_NUM_SQS]; + + struct mlx5e_flow_steering fs; + + struct workqueue_struct *wq; + struct work_struct update_carrier_work; + struct work_struct set_rx_mode_work; + struct work_struct tx_timeout_work; + struct delayed_work update_stats_work; + + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_stats stats; + struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS]; + u8 max_opened_tc; + struct hwtstamp_config tstamp; + u16 q_counter; + u16 drop_rq_q_counter; +#ifdef CONFIG_MLX5_CORE_EN_DCB + struct mlx5e_dcbx dcbx; +#endif + + const struct mlx5e_profile *profile; + void *ppriv; +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5e_ipsec *ipsec; +#endif +#ifdef CONFIG_MLX5_EN_TLS + struct mlx5e_tls *tls; +#endif +}; + +struct mlx5e_profile { + void (*init)(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, void *ppriv); + void (*cleanup)(struct mlx5e_priv *priv); + int (*init_rx)(struct mlx5e_priv *priv); + void (*cleanup_rx)(struct mlx5e_priv *priv); + int (*init_tx)(struct mlx5e_priv *priv); + void (*cleanup_tx)(struct mlx5e_priv *priv); + void (*enable)(struct mlx5e_priv *priv); + void (*disable)(struct mlx5e_priv *priv); + void (*update_stats)(struct mlx5e_priv *priv); + void (*update_carrier)(struct mlx5e_priv *priv); + int (*max_nch)(struct mlx5_core_dev *mdev); + struct { + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe; + } rx_handlers; + int max_tc; +}; + +void mlx5e_build_ptys2ethtool_map(void); + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback); +netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_tx_wqe *wqe, u16 pi); + +void mlx5e_completion_event(struct mlx5_core_cq *mcq); +void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); +int mlx5e_napi_poll(struct napi_struct *napi, int budget); +bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); +int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); + +bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); +bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); + +void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info); +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle); +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); +bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq); +void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); +void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); +struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx); +struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx); +struct sk_buff * +mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); +struct sk_buff * +mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); + +void mlx5e_update_stats(struct mlx5e_priv *priv); + +void mlx5e_init_l2_addr(struct mlx5e_priv *priv); +int mlx5e_self_test_num(struct mlx5e_priv *priv); +void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest, + u64 *buf); +void mlx5e_set_rx_mode_work(struct work_struct *work); + +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid); +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid); +void mlx5e_timestamp_init(struct mlx5e_priv *priv); + +struct mlx5e_redirect_rqt_param { + bool is_rss; + union { + u32 rqn; /* Direct RQN (Non-RSS) */ + struct { + u8 hfunc; + struct mlx5e_channels *channels; + } rss; /* RSS data */ + }; +}; + +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, + struct mlx5e_redirect_rqt_param rrp); +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, + enum mlx5e_traffic_types tt, + void *tirc, bool inner); + +int mlx5e_open_locked(struct net_device *netdev); +int mlx5e_close_locked(struct net_device *netdev); + +int mlx5e_open_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs); +void mlx5e_close_channels(struct mlx5e_channels *chs); + +/* Function pointer to be used to modify WH settings while + * switching channels + */ +typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv); +void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify); +void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); +void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); + +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, + int num_channels); +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, + u8 cq_period_mode); +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, + u8 cq_period_mode); +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); + +static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); +} + +static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe **wqe, + u16 *pi) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + + *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + *wqe = mlx5_wq_cyc_get_wqe(wq, *pi); + memset(*wqe, 0, sizeof(**wqe)); +} + +static inline +struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + + (*pc)++; + + return wqe; +} + +static inline +void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, + void __iomem *uar_map, + struct mlx5_wqe_ctrl_seg *ctrl) +{ + ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + *wq->db = cpu_to_be32(pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + mlx5_write64((__be32 *)ctrl, uar_map, NULL); +} + +static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) +{ + struct mlx5_core_cq *mcq; + + mcq = &cq->mcq; + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); +} + +extern const struct ethtool_ops mlx5e_ethtool_ops; +#ifdef CONFIG_MLX5_CORE_EN_DCB +extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; +int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets); +void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv); +void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv); +void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv); +#endif + +int mlx5e_create_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir, u32 *in, int inlen); +void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir); +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb); + +/* common netdev helpers */ +int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); + +int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); + +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); + +int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, + u32 underlay_qpn, u32 *tisn); +void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); + +int mlx5e_create_tises(struct mlx5e_priv *priv); +void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); +int mlx5e_close(struct net_device *netdev); +int mlx5e_open(struct net_device *netdev); +void mlx5e_update_stats_work(struct work_struct *work); + +int mlx5e_bits_invert(unsigned long a, int size); + +typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv); +int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, + change_hw_mtu_cb set_mtu_cb); + +/* ethtool helpers */ +void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, + struct ethtool_drvinfo *drvinfo); +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, + uint32_t stringset, uint8_t *data); +int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset); +void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, + struct ethtool_stats *stats, u64 *data); +void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param); +int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param); +void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch); +int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch); +int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal); +int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal); +int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, + struct ethtool_ts_info *info); +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash); + +/* mlx5e generic netdev management API */ +struct net_device* +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, + void *ppriv); +int mlx5e_attach_netdev(struct mlx5e_priv *priv); +void mlx5e_detach_netdev(struct mlx5e_priv *priv); +void mlx5e_destroy_netdev(struct mlx5e_priv *priv); +void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 max_channels, u16 mtu); +u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); +void mlx5e_rx_dim_work(struct work_struct *work); +void mlx5e_tx_dim_work(struct work_struct *work); +#endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile new file mode 100644 index 000000000..d8e17110f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h new file mode 100644 index 000000000..1431232c9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5E_FLOW_STEER_H__ +#define __MLX5E_FLOW_STEER_H__ + +enum { + MLX5E_TC_FT_LEVEL = 0, + MLX5E_TC_TTC_FT_LEVEL, +}; + +struct mlx5e_tc_table { + struct mlx5_flow_table *t; + + struct rhashtable ht; + + DECLARE_HASHTABLE(mod_hdr_tbl, 8); + DECLARE_HASHTABLE(hairpin_tbl, 8); + + struct notifier_block netdevice_nb; +}; + +struct mlx5e_flow_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; +}; + +struct mlx5e_l2_rule { + u8 addr[ETH_ALEN + 2]; + struct mlx5_flow_handle *rule; +}; + +#define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE) + +struct mlx5e_vlan_table { + struct mlx5e_flow_table ft; + DECLARE_BITMAP(active_cvlans, VLAN_N_VID); + DECLARE_BITMAP(active_svlans, VLAN_N_VID); + struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *untagged_rule; + struct mlx5_flow_handle *any_cvlan_rule; + struct mlx5_flow_handle *any_svlan_rule; + bool cvlan_filter_disabled; +}; + +struct mlx5e_l2_table { + struct mlx5e_flow_table ft; + struct hlist_head netdev_uc[MLX5E_L2_ADDR_HASH_SIZE]; + struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE]; + struct mlx5e_l2_rule broadcast; + struct mlx5e_l2_rule allmulti; + struct mlx5e_l2_rule promisc; + bool broadcast_enabled; + bool allmulti_enabled; + bool promisc_enabled; +}; + +enum mlx5e_traffic_types { + MLX5E_TT_IPV4_TCP, + MLX5E_TT_IPV6_TCP, + MLX5E_TT_IPV4_UDP, + MLX5E_TT_IPV6_UDP, + MLX5E_TT_IPV4_IPSEC_AH, + MLX5E_TT_IPV6_IPSEC_AH, + MLX5E_TT_IPV4_IPSEC_ESP, + MLX5E_TT_IPV6_IPSEC_ESP, + MLX5E_TT_IPV4, + MLX5E_TT_IPV6, + MLX5E_TT_ANY, + MLX5E_NUM_TT, + MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, +}; + +enum mlx5e_tunnel_types { + MLX5E_TT_IPV4_GRE, + MLX5E_TT_IPV6_GRE, + MLX5E_NUM_TUNNEL_TT, +}; + +/* L3/L4 traffic type classifier */ +struct mlx5e_ttc_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_handle *rules[MLX5E_NUM_TT]; + struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT]; +}; + +/* NIC prio FTS */ +enum { + MLX5E_VLAN_FT_LEVEL = 0, + MLX5E_L2_FT_LEVEL, + MLX5E_TTC_FT_LEVEL, + MLX5E_INNER_TTC_FT_LEVEL, +#ifdef CONFIG_MLX5_EN_ARFS + MLX5E_ARFS_FT_LEVEL +#endif +}; + +#ifdef CONFIG_MLX5_EN_RXNFC + +struct mlx5e_ethtool_table { + struct mlx5_flow_table *ft; + int num_rules; +}; + +#define ETHTOOL_NUM_L3_L4_FTS 7 +#define ETHTOOL_NUM_L2_FTS 4 + +struct mlx5e_ethtool_steering { + struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS]; + struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS]; + struct list_head rules; + int tot_num_rules; +}; + +void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); +void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); +int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd); +int mlx5e_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *info, u32 *rule_locs); +#else +static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) { } +static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { } +#endif /* CONFIG_MLX5_EN_RXNFC */ + +#ifdef CONFIG_MLX5_EN_ARFS +#define ARFS_HASH_SHIFT BITS_PER_BYTE +#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE) + +struct arfs_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_handle *default_rule; + struct hlist_head rules_hash[ARFS_HASH_SIZE]; +}; + +enum arfs_type { + ARFS_IPV4_TCP, + ARFS_IPV6_TCP, + ARFS_IPV4_UDP, + ARFS_IPV6_UDP, + ARFS_NUM_TYPES, +}; + +struct mlx5e_arfs_tables { + struct arfs_table arfs_tables[ARFS_NUM_TYPES]; + /* Protect aRFS rules list */ + spinlock_t arfs_lock; + struct list_head rules; + int last_filter_id; + struct workqueue_struct *wq; +}; + +int mlx5e_arfs_create_tables(struct mlx5e_priv *priv); +void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv); +int mlx5e_arfs_enable(struct mlx5e_priv *priv); +int mlx5e_arfs_disable(struct mlx5e_priv *priv); +int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id); +#else +static inline int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {} +static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; } +static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; } +#endif + +struct mlx5e_flow_steering { + struct mlx5_flow_namespace *ns; +#ifdef CONFIG_MLX5_EN_RXNFC + struct mlx5e_ethtool_steering ethtool; +#endif + struct mlx5e_tc_table tc; + struct mlx5e_vlan_table vlan; + struct mlx5e_l2_table l2; + struct mlx5e_ttc_table ttc; + struct mlx5e_ttc_table inner_ttc; +#ifdef CONFIG_MLX5_EN_ARFS + struct mlx5e_arfs_tables arfs; +#endif +}; + +struct ttc_params { + struct mlx5_flow_table_attr ft_attr; + u32 any_tt_tirn; + u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_ttc_table *inner_ttc; +}; + +void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params); +void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params); +void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params); + +int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc); +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc); + +int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc); +void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc); + +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); + +void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv); +void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); + +int mlx5e_create_flow_steering(struct mlx5e_priv *priv); +void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); + +#endif /* __MLX5E_FLOW_STEER_H__ */ + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c new file mode 100644 index 000000000..12e1682f9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "port.h" + +/* speed in units of 1Mb */ +static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { + [MLX5E_1000BASE_CX_SGMII] = 1000, + [MLX5E_1000BASE_KX] = 1000, + [MLX5E_10GBASE_CX4] = 10000, + [MLX5E_10GBASE_KX4] = 10000, + [MLX5E_10GBASE_KR] = 10000, + [MLX5E_20GBASE_KR2] = 20000, + [MLX5E_40GBASE_CR4] = 40000, + [MLX5E_40GBASE_KR4] = 40000, + [MLX5E_56GBASE_R4] = 56000, + [MLX5E_10GBASE_CR] = 10000, + [MLX5E_10GBASE_SR] = 10000, + [MLX5E_10GBASE_ER] = 10000, + [MLX5E_40GBASE_SR4] = 40000, + [MLX5E_40GBASE_LR4] = 40000, + [MLX5E_50GBASE_SR2] = 50000, + [MLX5E_100GBASE_CR4] = 100000, + [MLX5E_100GBASE_SR4] = 100000, + [MLX5E_100GBASE_KR4] = 100000, + [MLX5E_100GBASE_LR4] = 100000, + [MLX5E_100BASE_TX] = 100, + [MLX5E_1000BASE_T] = 1000, + [MLX5E_10GBASE_T] = 10000, + [MLX5E_25GBASE_CR] = 25000, + [MLX5E_25GBASE_KR] = 25000, + [MLX5E_25GBASE_SR] = 25000, + [MLX5E_50GBASE_CR2] = 50000, + [MLX5E_50GBASE_KR2] = 50000, +}; + +u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) +{ + unsigned long temp = eth_proto_oper; + u32 speed = 0; + int i; + + i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER); + if (i < MLX5E_LINK_MODES_NUMBER) + speed = mlx5e_link_speed[i]; + + return speed; +} + +int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; + u32 eth_proto_oper; + int err; + + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); + if (err) + return err; + + eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + *speed = mlx5e_port_ptys2speed(eth_proto_oper); + if (!(*speed)) + err = -EINVAL; + + return err; +} + +int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) +{ + u32 max_speed = 0; + u32 proto_cap; + int err; + int i; + + err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN); + if (err) + return err; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) + if (proto_cap & MLX5E_PROT_MASK(i)) + max_speed = max(max_speed, mlx5e_link_speed[i]); + + *speed = max_speed; + return 0; +} + +u32 mlx5e_port_speed2linkmodes(u32 speed) +{ + u32 link_modes = 0; + int i; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (mlx5e_link_speed[i] == speed) + link_modes |= MLX5E_PROT_MASK(i); + } + + return link_modes; +} + +int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out) +{ + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + void *in; + int err; + + in = kzalloc(sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(pbmc_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 0); + + kfree(in); + return err; +} + +int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in) +{ + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + void *out; + int err; + + out = kzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(pbmc_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 1); + + kfree(out); + return err; +} + +/* buffer[i]: buffer that priority i mapped to */ +int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer) +{ + int sz = MLX5_ST_SZ_BYTES(pptb_reg); + u32 prio_x_buff; + void *out; + void *in; + int prio; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(pptb_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0); + if (err) + goto out; + + prio_x_buff = MLX5_GET(pptb_reg, out, prio_x_buff); + for (prio = 0; prio < 8; prio++) { + buffer[prio] = (u8)(prio_x_buff >> (4 * prio)) & 0xF; + mlx5_core_dbg(mdev, "prio %d, buffer %d\n", prio, buffer[prio]); + } +out: + kfree(in); + kfree(out); + return err; +} + +int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer) +{ + int sz = MLX5_ST_SZ_BYTES(pptb_reg); + u32 prio_x_buff; + void *out; + void *in; + int prio; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + /* First query the pptb register */ + MLX5_SET(pptb_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0); + if (err) + goto out; + + memcpy(in, out, sz); + MLX5_SET(pptb_reg, in, local_port, 1); + + /* Update the pm and prio_x_buff */ + MLX5_SET(pptb_reg, in, pm, 0xFF); + + prio_x_buff = 0; + for (prio = 0; prio < 8; prio++) + prio_x_buff |= (buffer[prio] << (4 * prio)); + MLX5_SET(pptb_reg, in, prio_x_buff, prio_x_buff); + + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 1); + +out: + kfree(in); + kfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h new file mode 100644 index 000000000..f8cbd8194 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_EN_PORT_H +#define __MLX5E_EN_PORT_H + +#include <linux/mlx5/driver.h> +#include "en.h" + +u32 mlx5e_port_ptys2speed(u32 eth_proto_oper); +int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); +int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); +u32 mlx5e_port_speed2linkmodes(u32 speed); + +int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); +int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); +int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); +int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c new file mode 100644 index 000000000..28d56e44e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "port_buffer.h" + +int mlx5e_port_query_buffer(struct mlx5e_priv *priv, + struct mlx5e_port_buffer *port_buffer) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + u32 total_used = 0; + void *buffer; + void *out; + int err; + int i; + + out = kzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5e_port_query_pbmc(mdev, out); + if (err) + goto out; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]); + port_buffer->buffer[i].lossy = + MLX5_GET(bufferx_reg, buffer, lossy); + port_buffer->buffer[i].epsb = + MLX5_GET(bufferx_reg, buffer, epsb); + port_buffer->buffer[i].size = + MLX5_GET(bufferx_reg, buffer, size) << MLX5E_BUFFER_CELL_SHIFT; + port_buffer->buffer[i].xon = + MLX5_GET(bufferx_reg, buffer, xon_threshold) << MLX5E_BUFFER_CELL_SHIFT; + port_buffer->buffer[i].xoff = + MLX5_GET(bufferx_reg, buffer, xoff_threshold) << MLX5E_BUFFER_CELL_SHIFT; + total_used += port_buffer->buffer[i].size; + + mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i, + port_buffer->buffer[i].size, + port_buffer->buffer[i].xon, + port_buffer->buffer[i].xoff, + port_buffer->buffer[i].epsb, + port_buffer->buffer[i].lossy); + } + + port_buffer->port_buffer_size = + MLX5_GET(pbmc_reg, out, port_buffer_size) << MLX5E_BUFFER_CELL_SHIFT; + port_buffer->spare_buffer_size = + port_buffer->port_buffer_size - total_used; + + mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n", + port_buffer->port_buffer_size, + port_buffer->spare_buffer_size); +out: + kfree(out); + return err; +} + +static int port_set_buffer(struct mlx5e_priv *priv, + struct mlx5e_port_buffer *port_buffer) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + void *buffer; + void *in; + int err; + int i; + + in = kzalloc(sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + err = mlx5e_port_query_pbmc(mdev, in); + if (err) + goto out; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); + + MLX5_SET(bufferx_reg, buffer, size, + port_buffer->buffer[i].size >> MLX5E_BUFFER_CELL_SHIFT); + MLX5_SET(bufferx_reg, buffer, lossy, + port_buffer->buffer[i].lossy); + MLX5_SET(bufferx_reg, buffer, xoff_threshold, + port_buffer->buffer[i].xoff >> MLX5E_BUFFER_CELL_SHIFT); + MLX5_SET(bufferx_reg, buffer, xon_threshold, + port_buffer->buffer[i].xon >> MLX5E_BUFFER_CELL_SHIFT); + } + + err = mlx5e_port_set_pbmc(mdev, in); +out: + kfree(in); + return err; +} + +/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) + * minimum speed value is 40Gbps + */ +static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) +{ + u32 speed; + u32 xoff; + int err; + + err = mlx5e_port_linkspeed(priv->mdev, &speed); + if (err) + speed = SPEED_40000; + speed = max_t(u32, speed, SPEED_40000); + + xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100; + + mlx5e_dbg(HW, priv, "%s: xoff=%d\n", __func__, xoff); + return xoff; +} + +static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, + u32 xoff, unsigned int max_mtu) +{ + int i; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + if (port_buffer->buffer[i].lossy) { + port_buffer->buffer[i].xoff = 0; + port_buffer->buffer[i].xon = 0; + continue; + } + + if (port_buffer->buffer[i].size < + (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) { + pr_err("buffer_size[%d]=%d is not enough for lossless buffer\n", + i, port_buffer->buffer[i].size); + return -ENOMEM; + } + + port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff; + port_buffer->buffer[i].xon = + port_buffer->buffer[i].xoff - max_mtu; + } + + return 0; +} + +/** + * update_buffer_lossy() + * max_mtu: netdev's max_mtu + * pfc_en: <input> current pfc configuration + * buffer: <input> current prio to buffer mapping + * xoff: <input> xoff value + * port_buffer: <output> port receive buffer configuration + * change: <output> + * + * Update buffer configuration based on pfc configuraiton and priority + * to buffer mapping. + * Buffer's lossy bit is changed to: + * lossless if there is at least one PFC enabled priority mapped to this buffer + * lossy if all priorities mapped to this buffer are PFC disabled + * + * Return: + * Return 0 if no error. + * Set change to true if buffer configuration is modified. + */ +static int update_buffer_lossy(unsigned int max_mtu, + u8 pfc_en, u8 *buffer, u32 xoff, + struct mlx5e_port_buffer *port_buffer, + bool *change) +{ + bool changed = false; + u8 lossy_count; + u8 prio_count; + u8 lossy; + int prio; + int err; + int i; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + prio_count = 0; + lossy_count = 0; + + for (prio = 0; prio < MLX5E_MAX_PRIORITY; prio++) { + if (buffer[prio] != i) + continue; + + prio_count++; + lossy_count += !(pfc_en & (1 << prio)); + } + + if (lossy_count == prio_count) + lossy = 1; + else /* lossy_count < prio_count */ + lossy = 0; + + if (lossy != port_buffer->buffer[i].lossy) { + port_buffer->buffer[i].lossy = lossy; + changed = true; + } + } + + if (changed) { + err = update_xoff_threshold(port_buffer, xoff, max_mtu); + if (err) + return err; + + *change = true; + } + + return 0; +} + +static int fill_pfc_en(struct mlx5_core_dev *mdev, u8 *pfc_en) +{ + u32 g_rx_pause, g_tx_pause; + int err; + + err = mlx5_query_port_pause(mdev, &g_rx_pause, &g_tx_pause); + if (err) + return err; + + /* If global pause enabled, set all active buffers to lossless. + * Otherwise, check PFC setting. + */ + if (g_rx_pause || g_tx_pause) + *pfc_en = 0xff; + else + err = mlx5_query_port_pfc(mdev, pfc_en, NULL); + + return err; +} + +#define MINIMUM_MAX_MTU 9216 +int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + u32 change, unsigned int mtu, + struct ieee_pfc *pfc, + u32 *buffer_size, + u8 *prio2buffer) +{ + struct mlx5e_port_buffer port_buffer; + u32 xoff = calculate_xoff(priv, mtu); + bool update_prio2buffer = false; + u8 buffer[MLX5E_MAX_PRIORITY]; + bool update_buffer = false; + unsigned int max_mtu; + u32 total_used = 0; + u8 curr_pfc_en; + int err; + int i; + + mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change); + max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU); + + err = mlx5e_port_query_buffer(priv, &port_buffer); + if (err) + return err; + + if (change & MLX5E_PORT_BUFFER_CABLE_LEN) { + update_buffer = true; + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); + if (err) + return err; + } + + if (change & MLX5E_PORT_BUFFER_PFC) { + err = mlx5e_port_query_priority2buffer(priv->mdev, buffer); + if (err) + return err; + + err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, + &port_buffer, &update_buffer); + if (err) + return err; + } + + if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) { + update_prio2buffer = true; + err = fill_pfc_en(priv->mdev, &curr_pfc_en); + if (err) + return err; + + err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, + xoff, &port_buffer, &update_buffer); + if (err) + return err; + } + + if (change & MLX5E_PORT_BUFFER_SIZE) { + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]); + if (!port_buffer.buffer[i].lossy && !buffer_size[i]) { + mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n", + __func__, i); + return -EINVAL; + } + + port_buffer.buffer[i].size = buffer_size[i]; + total_used += buffer_size[i]; + } + + mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used); + + if (total_used > port_buffer.port_buffer_size) + return -EINVAL; + + update_buffer = true; + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); + if (err) + return err; + } + + /* Need to update buffer configuration if xoff value is changed */ + if (!update_buffer && xoff != priv->dcbx.xoff) { + update_buffer = true; + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); + if (err) + return err; + } + priv->dcbx.xoff = xoff; + + /* Apply the settings */ + if (update_buffer) { + err = port_set_buffer(priv, &port_buffer); + if (err) + return err; + } + + if (update_prio2buffer) + err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h new file mode 100644 index 000000000..34f55b81a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_PORT_BUFFER_H__ +#define __MLX5_EN_PORT_BUFFER_H__ + +#include "en.h" +#include "port.h" + +#define MLX5E_MAX_BUFFER 8 +#define MLX5E_BUFFER_CELL_SHIFT 7 +#define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */ + +#define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \ + MLX5_CAP_PCAM_REG(mdev, pbmc) && \ + MLX5_CAP_PCAM_REG(mdev, pptb)) + +enum { + MLX5E_PORT_BUFFER_CABLE_LEN = BIT(0), + MLX5E_PORT_BUFFER_PFC = BIT(1), + MLX5E_PORT_BUFFER_PRIO2BUFFER = BIT(2), + MLX5E_PORT_BUFFER_SIZE = BIT(3), +}; + +struct mlx5e_bufferx_reg { + u8 lossy; + u8 epsb; + u32 size; + u32 xoff; + u32 xon; +}; + +struct mlx5e_port_buffer { + u32 port_buffer_size; + u32 spare_buffer_size; + struct mlx5e_bufferx_reg buffer[MLX5E_MAX_BUFFER]; +}; + +int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + u32 change, unsigned int mtu, + struct ieee_pfc *pfc, + u32 *buffer_size, + u8 *prio2buffer); + +int mlx5e_port_query_buffer(struct mlx5e_priv *priv, + struct mlx5e_port_buffer *port_buffer); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c new file mode 100644 index 000000000..599114ab7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/bpf_trace.h> +#include "en/xdp.h" + +int mlx5e_xdp_max_mtu(struct mlx5e_params *params) +{ + int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM; + + /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). + * The condition checked in mlx5e_rx_is_linear_skb is: + * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) + * (Note that hw_mtu == sw_mtu + hard_mtu.) + * What is returned from this function is: + * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) + * After assigning sw_mtu := max_mtu, the left side of (1) turns to + * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, + * because both PAGE_SIZE and S are already aligned. Any number greater + * than max_mtu would make the left side of (1) greater than PAGE_SIZE, + * so max_mtu is the maximum MTU allowed. + */ + + return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); +} + +static inline bool +mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, + struct xdp_buff *xdp) +{ + struct mlx5e_xdp_info xdpi; + + xdpi.xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpi.xdpf)) + return false; + xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf); + dma_sync_single_for_device(sq->pdev, xdpi.dma_addr, + xdpi.xdpf->len, PCI_DMA_TODEVICE); + xdpi.di = *di; + + return mlx5e_xmit_xdp_frame(sq, &xdpi); +} + +/* returns true if packet was consumed by xdp */ +bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, + void *va, u16 *rx_headroom, u32 *len) +{ + struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); + struct xdp_buff xdp; + u32 act; + int err; + + if (!prog) + return false; + + xdp.data = va + *rx_headroom; + xdp_set_data_meta_invalid(&xdp); + xdp.data_end = xdp.data + *len; + xdp.data_hard_start = va; + xdp.rxq = &rq->xdp_rxq; + + act = bpf_prog_run_xdp(prog, &xdp); + switch (act) { + case XDP_PASS: + *rx_headroom = xdp.data - xdp.data_hard_start; + *len = xdp.data_end - xdp.data; + return false; + case XDP_TX: + if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp))) + goto xdp_abort; + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ + return true; + case XDP_REDIRECT: + /* When XDP enabled then page-refcnt==1 here */ + err = xdp_do_redirect(rq->netdev, &xdp, prog); + if (unlikely(err)) + goto xdp_abort; + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); + rq->xdpsq.redirect_flush = true; + mlx5e_page_dma_unmap(rq, di); + rq->stats->xdp_redirect++; + return true; + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: +xdp_abort: + trace_xdp_exception(rq->netdev, prog, act); + /* fall through */ + case XDP_DROP: + rq->stats->xdp_drop++; + return true; + } +} + +bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg = wqe->data; + + struct xdp_frame *xdpf = xdpi->xdpf; + dma_addr_t dma_addr = xdpi->dma_addr; + unsigned int dma_len = xdpf->len; + + struct mlx5e_xdpsq_stats *stats = sq->stats; + + prefetchw(wqe); + + if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { + stats->err++; + return false; + } + + if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { + if (sq->doorbell) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->doorbell = false; + } + stats->full++; + return false; + } + + cseg->fm_ce_se = 0; + + /* copy the inline part if required */ + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE); + eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + dma_len -= MLX5E_XDP_MIN_INLINE; + dma_addr += MLX5E_XDP_MIN_INLINE; + dseg++; + } + + /* write the dma part */ + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); + + /* move page to reference to sq responsibility, + * and mark so it's not put back in page-cache. + */ + sq->db.xdpi[pi] = *xdpi; + sq->pc++; + + sq->doorbell = true; + + stats->xmit++; + return true; +} + +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_xdpsq *sq; + struct mlx5_cqe64 *cqe; + struct mlx5e_rq *rq; + bool is_redirect; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_xdpsq, cq); + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return false; + + is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state); + rq = container_of(sq, struct mlx5e_rq, xdpsq); + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + i = 0; + do { + u16 wqe_counter; + bool last_wqe; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci]; + + last_wqe = (sqcc == wqe_counter); + sqcc++; + + if (is_redirect) { + dma_unmap_single(sq->pdev, xdpi->dma_addr, + xdpi->xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi->xdpf); + } else { + /* Recycle RX page */ + mlx5e_page_release(rq, &xdpi->di, true); + } + } while (!last_wqe); + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + + sq->stats->cqes += i; + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_rq *rq; + bool is_redirect; + + is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state); + rq = is_redirect ? NULL : container_of(sq, struct mlx5e_rq, xdpsq); + + while (sq->cc != sq->pc) { + u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); + struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci]; + + sq->cc++; + + if (is_redirect) { + dma_unmap_single(sq->pdev, xdpi->dma_addr, + xdpi->xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi->xdpf); + } else { + /* Recycle RX page */ + mlx5e_page_release(rq, &xdpi->di, false); + } + } +} + +int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_xdpsq *sq; + int drops = 0; + int sq_num; + int i; + + /* this flag is sufficient, no need to test internal sq state */ + if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) + return -ENETDOWN; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + sq_num = smp_processor_id(); + + if (unlikely(sq_num >= priv->channels.num)) + return -ENXIO; + + sq = &priv->channels.c[sq_num]->xdpsq; + + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + struct mlx5e_xdp_info xdpi; + + xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) { + xdp_return_frame_rx_napi(xdpf); + drops++; + continue; + } + + xdpi.xdpf = xdpf; + + if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) { + dma_unmap_single(sq->pdev, xdpi.dma_addr, + xdpf->len, DMA_TO_DEVICE); + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + + if (flags & XDP_XMIT_FLUSH) + mlx5e_xmit_xdp_doorbell(sq); + + return n - drops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h new file mode 100644 index 000000000..827ceef5f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_XDP_H__ +#define __MLX5_EN_XDP_H__ + +#include "en.h" + +#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) +#define MLX5E_XDP_TX_DS_COUNT \ + ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) + +int mlx5e_xdp_max_mtu(struct mlx5e_params *params); +bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, + void *va, u16 *rx_headroom, u32 *len); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); + +bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi); +int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); + +static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); +} + +static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) +{ + clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); + /* let other device's napi(s) see our new state */ + synchronize_rcu(); +} + +static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); +} + +static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_wqe *wqe; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc - 1); /* last pi */ + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl); +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile new file mode 100644 index 000000000..d8e17110f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h new file mode 100644 index 000000000..1dd225380 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_EN_ACCEL_H__ +#define __MLX5E_EN_ACCEL_H__ + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/tls_rxtx.h" +#include "en.h" + +static inline void +mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb) +{ + int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr); + + udp_hdr(skb)->len = htons(payload_len); +} + +static inline struct sk_buff * +mlx5e_accel_handle_tx(struct sk_buff *skb, + struct mlx5e_txqsq *sq, + struct net_device *dev, + struct mlx5e_tx_wqe **wqe, + u16 *pi) +{ +#ifdef CONFIG_MLX5_EN_TLS + if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) { + skb = mlx5e_tls_handle_tx_skb(dev, sq, skb, wqe, pi); + if (unlikely(!skb)) + return NULL; + } +#endif + +#ifdef CONFIG_MLX5_EN_IPSEC + if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) { + skb = mlx5e_ipsec_handle_tx_skb(dev, *wqe, skb); + if (unlikely(!skb)) + return NULL; + } +#endif + + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + mlx5e_udp_gso_handle_tx_skb(skb); + + return skb; +} + +#endif /* __MLX5E_EN_ACCEL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c new file mode 100644 index 000000000..c467f5e98 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -0,0 +1,547 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <crypto/internal/geniv.h> +#include <crypto/aead.h> +#include <linux/inetdevice.h> +#include <linux/netdevice.h> +#include <linux/module.h> + +#include "en.h" +#include "en_accel/ipsec.h" +#include "en_accel/ipsec_rxtx.h" + + +static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa; + + if (!x) + return NULL; + + sa = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; + if (!sa) + return NULL; + + WARN_ON(sa->x != x); + return sa; +} + +struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec, + unsigned int handle) +{ + struct mlx5e_ipsec_sa_entry *sa_entry; + struct xfrm_state *ret = NULL; + + rcu_read_lock(); + hash_for_each_possible_rcu(ipsec->sadb_rx, sa_entry, hlist, handle) + if (sa_entry->handle == handle) { + ret = sa_entry->x; + xfrm_state_hold(ret); + break; + } + rcu_read_unlock(); + + return ret; +} + +static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + unsigned long flags; + int ret; + + ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL); + if (ret < 0) + return ret; + + spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); + sa_entry->handle = ret; + hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle); + spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags); + + return 0; +} + +static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + unsigned long flags; + + spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); + hash_del_rcu(&sa_entry->hlist); + spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags); +} + +static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + + /* xfrm already doing sync rcu between del and free callbacks */ + + ida_simple_remove(&ipsec->halloc, sa_entry->handle); +} + +static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_replay_state_esn *replay_esn; + u32 seq_bottom; + u8 overlap; + u32 *esn; + + if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) { + sa_entry->esn_state.trigger = 0; + return false; + } + + replay_esn = sa_entry->x->replay_esn; + seq_bottom = replay_esn->seq - replay_esn->replay_window + 1; + overlap = sa_entry->esn_state.overlap; + + sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x, + htonl(seq_bottom)); + esn = &sa_entry->esn_state.esn; + + sa_entry->esn_state.trigger = 1; + if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { + ++(*esn); + sa_entry->esn_state.overlap = 0; + return true; + } else if (unlikely(!overlap && + (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) { + sa_entry->esn_state.overlap = 1; + return true; + } + + return false; +} + +static void +mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct xfrm_state *x = sa_entry->x; + struct aes_gcm_keymat *aes_gcm = &attrs->keymat.aes_gcm; + struct aead_geniv_ctx *geniv_ctx; + struct crypto_aead *aead; + unsigned int crypto_data_len, key_len; + int ivsize; + + memset(attrs, 0, sizeof(*attrs)); + + /* key */ + crypto_data_len = (x->aead->alg_key_len + 7) / 8; + key_len = crypto_data_len - 4; /* 4 bytes salt at end */ + + memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len); + aes_gcm->key_len = key_len * 8; + + /* salt and seq_iv */ + aead = x->data; + geniv_ctx = crypto_aead_ctx(aead); + ivsize = crypto_aead_ivsize(aead); + memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize); + memcpy(&aes_gcm->salt, x->aead->alg_key + key_len, + sizeof(aes_gcm->salt)); + + /* iv len */ + aes_gcm->icv_len = x->aead->alg_icv_len; + + /* esn */ + if (sa_entry->esn_state.trigger) { + attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; + attrs->esn = sa_entry->esn_state.esn; + if (sa_entry->esn_state.overlap) + attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + } + + /* rx handle */ + attrs->sa_handle = sa_entry->handle; + + /* algo type */ + attrs->keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; + + /* action */ + attrs->action = (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) ? + MLX5_ACCEL_ESP_ACTION_ENCRYPT : + MLX5_ACCEL_ESP_ACTION_DECRYPT; + /* flags */ + attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ? + MLX5_ACCEL_ESP_FLAGS_TRANSPORT : + MLX5_ACCEL_ESP_FLAGS_TUNNEL; +} + +static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) +{ + struct net_device *netdev = x->xso.dev; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + + if (x->props.aalgo != SADB_AALG_NONE) { + netdev_info(netdev, "Cannot offload authenticated xfrm states\n"); + return -EINVAL; + } + if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) { + netdev_info(netdev, "Only AES-GCM-ICV16 xfrm state may be offloaded\n"); + return -EINVAL; + } + if (x->props.calgo != SADB_X_CALG_NONE) { + netdev_info(netdev, "Cannot offload compressed xfrm states\n"); + return -EINVAL; + } + if (x->props.flags & XFRM_STATE_ESN && + !(mlx5_accel_ipsec_device_caps(priv->mdev) & + MLX5_ACCEL_IPSEC_CAP_ESN)) { + netdev_info(netdev, "Cannot offload ESN xfrm states\n"); + return -EINVAL; + } + if (x->props.family != AF_INET && + x->props.family != AF_INET6) { + netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n"); + return -EINVAL; + } + if (x->props.mode != XFRM_MODE_TRANSPORT && + x->props.mode != XFRM_MODE_TUNNEL) { + dev_info(&netdev->dev, "Only transport and tunnel xfrm states may be offloaded\n"); + return -EINVAL; + } + if (x->id.proto != IPPROTO_ESP) { + netdev_info(netdev, "Only ESP xfrm state may be offloaded\n"); + return -EINVAL; + } + if (x->encap) { + netdev_info(netdev, "Encapsulated xfrm state may not be offloaded\n"); + return -EINVAL; + } + if (!x->aead) { + netdev_info(netdev, "Cannot offload xfrm states without aead\n"); + return -EINVAL; + } + if (x->aead->alg_icv_len != 128) { + netdev_info(netdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n"); + return -EINVAL; + } + if ((x->aead->alg_key_len != 128 + 32) && + (x->aead->alg_key_len != 256 + 32)) { + netdev_info(netdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); + return -EINVAL; + } + if (x->tfcpad) { + netdev_info(netdev, "Cannot offload xfrm states with tfc padding\n"); + return -EINVAL; + } + if (!x->geniv) { + netdev_info(netdev, "Cannot offload xfrm states without geniv\n"); + return -EINVAL; + } + if (strcmp(x->geniv, "seqiv")) { + netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n"); + return -EINVAL; + } + if (x->props.family == AF_INET6 && + !(mlx5_accel_ipsec_device_caps(priv->mdev) & + MLX5_ACCEL_IPSEC_CAP_IPV6)) { + netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n"); + return -EINVAL; + } + return 0; +} + +static int mlx5e_xfrm_add_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = NULL; + struct net_device *netdev = x->xso.dev; + struct mlx5_accel_esp_xfrm_attrs attrs; + struct mlx5e_priv *priv; + __be32 saddr[4] = {0}, daddr[4] = {0}, spi; + bool is_ipv6 = false; + int err; + + priv = netdev_priv(netdev); + + err = mlx5e_xfrm_validate_state(x); + if (err) + return err; + + sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL); + if (!sa_entry) { + err = -ENOMEM; + goto out; + } + + sa_entry->x = x; + sa_entry->ipsec = priv->ipsec; + + /* Add the SA to handle processed incoming packets before the add SA + * completion was received + */ + if (x->xso.flags & XFRM_OFFLOAD_INBOUND) { + err = mlx5e_ipsec_sadb_rx_add(sa_entry); + if (err) { + netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err); + goto err_entry; + } + } else { + sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ? + mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv; + } + + /* check esn */ + mlx5e_ipsec_update_esn_state(sa_entry); + + /* create xfrm */ + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs); + sa_entry->xfrm = + mlx5_accel_esp_create_xfrm(priv->mdev, &attrs, + MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA); + if (IS_ERR(sa_entry->xfrm)) { + err = PTR_ERR(sa_entry->xfrm); + goto err_sadb_rx; + } + + /* create hw context */ + if (x->props.family == AF_INET) { + saddr[3] = x->props.saddr.a4; + daddr[3] = x->id.daddr.a4; + } else { + memcpy(saddr, x->props.saddr.a6, sizeof(saddr)); + memcpy(daddr, x->id.daddr.a6, sizeof(daddr)); + is_ipv6 = true; + } + spi = x->id.spi; + sa_entry->hw_context = + mlx5_accel_esp_create_hw_context(priv->mdev, + sa_entry->xfrm, + saddr, daddr, spi, + is_ipv6); + if (IS_ERR(sa_entry->hw_context)) { + err = PTR_ERR(sa_entry->hw_context); + goto err_xfrm; + } + + x->xso.offload_handle = (unsigned long)sa_entry; + goto out; + +err_xfrm: + mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm); +err_sadb_rx: + if (x->xso.flags & XFRM_OFFLOAD_INBOUND) { + mlx5e_ipsec_sadb_rx_del(sa_entry); + mlx5e_ipsec_sadb_rx_free(sa_entry); + } +err_entry: + kfree(sa_entry); +out: + return err; +} + +static void mlx5e_xfrm_del_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + + if (!sa_entry) + return; + + if (x->xso.flags & XFRM_OFFLOAD_INBOUND) + mlx5e_ipsec_sadb_rx_del(sa_entry); +} + +static void mlx5e_xfrm_free_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + + if (!sa_entry) + return; + + if (sa_entry->hw_context) { + flush_workqueue(sa_entry->ipsec->wq); + mlx5_accel_esp_free_hw_context(sa_entry->hw_context); + mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm); + } + + if (x->xso.flags & XFRM_OFFLOAD_INBOUND) + mlx5e_ipsec_sadb_rx_free(sa_entry); + + kfree(sa_entry); +} + +int mlx5e_ipsec_init(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec *ipsec = NULL; + + if (!MLX5_IPSEC_DEV(priv->mdev)) { + netdev_dbg(priv->netdev, "Not an IPSec offload device\n"); + return 0; + } + + ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL); + if (!ipsec) + return -ENOMEM; + + hash_init(ipsec->sadb_rx); + spin_lock_init(&ipsec->sadb_rx_lock); + ida_init(&ipsec->halloc); + ipsec->en_priv = priv; + ipsec->en_priv->ipsec = ipsec; + ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) & + MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER); + ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0, + priv->netdev->name); + if (!ipsec->wq) { + kfree(ipsec); + return -ENOMEM; + } + netdev_dbg(priv->netdev, "IPSec attached to netdevice\n"); + return 0; +} + +void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec *ipsec = priv->ipsec; + + if (!ipsec) + return; + + drain_workqueue(ipsec->wq); + destroy_workqueue(ipsec->wq); + + ida_destroy(&ipsec->halloc); + kfree(ipsec); + priv->ipsec = NULL; +} + +static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + if (x->props.family == AF_INET) { + /* Offload with IPv4 options is not supported yet */ + if (ip_hdr(skb)->ihl > 5) + return false; + } else { + /* Offload with IPv6 extension headers is not support yet */ + if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) + return false; + } + + return true; +} + +struct mlx5e_ipsec_modify_state_work { + struct work_struct work; + struct mlx5_accel_esp_xfrm_attrs attrs; + struct mlx5e_ipsec_sa_entry *sa_entry; +}; + +static void _update_xfrm_state(struct work_struct *work) +{ + int ret; + struct mlx5e_ipsec_modify_state_work *modify_work = + container_of(work, struct mlx5e_ipsec_modify_state_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = modify_work->sa_entry; + + ret = mlx5_accel_esp_modify_xfrm(sa_entry->xfrm, + &modify_work->attrs); + if (ret) + netdev_warn(sa_entry->ipsec->en_priv->netdev, + "Not an IPSec offload device\n"); + + kfree(modify_work); +} + +static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + struct mlx5e_ipsec_modify_state_work *modify_work; + bool need_update; + + if (!sa_entry) + return; + + need_update = mlx5e_ipsec_update_esn_state(sa_entry); + if (!need_update) + return; + + modify_work = kzalloc(sizeof(*modify_work), GFP_ATOMIC); + if (!modify_work) + return; + + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs); + modify_work->sa_entry = sa_entry; + + INIT_WORK(&modify_work->work, _update_xfrm_state); + WARN_ON(!queue_work(sa_entry->ipsec->wq, &modify_work->work)); +} + +static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { + .xdo_dev_state_add = mlx5e_xfrm_add_state, + .xdo_dev_state_delete = mlx5e_xfrm_del_state, + .xdo_dev_state_free = mlx5e_xfrm_free_state, + .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, + .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, +}; + +void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *netdev = priv->netdev; + + if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) || + !MLX5_CAP_ETH(mdev, swp)) { + mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n"); + return; + } + + mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n"); + netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; + netdev->features |= NETIF_F_HW_ESP; + netdev->hw_enc_features |= NETIF_F_HW_ESP; + + if (!MLX5_CAP_ETH(mdev, swp_csum)) { + mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n"); + return; + } + + netdev->features |= NETIF_F_HW_ESP_TX_CSUM; + netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM; + + if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) || + !MLX5_CAP_ETH(mdev, swp_lso)) { + mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n"); + return; + } + + mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n"); + netdev->features |= NETIF_F_GSO_ESP; + netdev->hw_features |= NETIF_F_GSO_ESP; + netdev->hw_enc_features |= NETIF_F_GSO_ESP; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h new file mode 100644 index 000000000..93bf10e65 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_IPSEC_H__ +#define __MLX5E_IPSEC_H__ + +#ifdef CONFIG_MLX5_EN_IPSEC + +#include <linux/mlx5/device.h> +#include <net/xfrm.h> +#include <linux/idr.h> + +#include "accel/ipsec.h" + +#define MLX5E_IPSEC_SADB_RX_BITS 10 +#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L + +struct mlx5e_priv; + +struct mlx5e_ipsec_sw_stats { + atomic64_t ipsec_rx_drop_sp_alloc; + atomic64_t ipsec_rx_drop_sadb_miss; + atomic64_t ipsec_rx_drop_syndrome; + atomic64_t ipsec_tx_drop_bundle; + atomic64_t ipsec_tx_drop_no_state; + atomic64_t ipsec_tx_drop_not_ip; + atomic64_t ipsec_tx_drop_trailer; + atomic64_t ipsec_tx_drop_metadata; +}; + +struct mlx5e_ipsec_stats { + u64 ipsec_dec_in_packets; + u64 ipsec_dec_out_packets; + u64 ipsec_dec_bypass_packets; + u64 ipsec_enc_in_packets; + u64 ipsec_enc_out_packets; + u64 ipsec_enc_bypass_packets; + u64 ipsec_dec_drop_packets; + u64 ipsec_dec_auth_fail_packets; + u64 ipsec_enc_drop_packets; + u64 ipsec_add_sa_success; + u64 ipsec_add_sa_fail; + u64 ipsec_del_sa_success; + u64 ipsec_del_sa_fail; + u64 ipsec_cmd_drop; +}; + +struct mlx5e_ipsec { + struct mlx5e_priv *en_priv; + DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS); + bool no_trailer; + spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */ + struct ida halloc; + struct mlx5e_ipsec_sw_stats sw_stats; + struct mlx5e_ipsec_stats stats; + struct workqueue_struct *wq; +}; + +struct mlx5e_ipsec_esn_state { + u32 esn; + u8 trigger: 1; + u8 overlap: 1; +}; + +struct mlx5e_ipsec_sa_entry { + struct hlist_node hlist; /* Item in SADB_RX hashtable */ + struct mlx5e_ipsec_esn_state esn_state; + unsigned int handle; /* Handle in SADB_RX */ + struct xfrm_state *x; + struct mlx5e_ipsec *ipsec; + struct mlx5_accel_esp_xfrm *xfrm; + void *hw_context; + void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo); +}; + +void mlx5e_ipsec_build_inverse_table(void); +int mlx5e_ipsec_init(struct mlx5e_priv *priv); +void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv); +void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv); + +int mlx5e_ipsec_get_count(struct mlx5e_priv *priv); +int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data); +void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv); +int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data); + +struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev, + unsigned int handle); + +#else + +static inline void mlx5e_ipsec_build_inverse_table(void) +{ +} + +static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) +{ +} + +static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) +{ +} + +static inline int mlx5e_ipsec_get_count(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, + uint8_t *data) +{ + return 0; +} + +static inline void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv) +{ +} + +static inline int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + return 0; +} + +#endif + +#endif /* __MLX5E_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c new file mode 100644 index 000000000..128a82b1d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <crypto/aead.h> +#include <net/xfrm.h> +#include <net/esp.h> + +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/ipsec.h" +#include "accel/accel.h" +#include "en.h" + +enum { + MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11, + MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12, + MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17, +}; + +struct mlx5e_ipsec_rx_metadata { + unsigned char nexthdr; + __be32 sa_handle; +} __packed; + +enum { + MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8, + MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9, +}; + +struct mlx5e_ipsec_tx_metadata { + __be16 mss_inv; /* 1/MSS in 16bit fixed point, only for LSO */ + __be16 seq; /* LSBs of the first TCP seq, only for LSO */ + u8 esp_next_proto; /* Next protocol of ESP */ +} __packed; + +struct mlx5e_ipsec_metadata { + unsigned char syndrome; + union { + unsigned char raw[5]; + /* from FPGA to host, on successful decrypt */ + struct mlx5e_ipsec_rx_metadata rx; + /* from host to FPGA */ + struct mlx5e_ipsec_tx_metadata tx; + } __packed content; + /* packet type ID field */ + __be16 ethertype; +} __packed; + +#define MAX_LSO_MSS 2048 + +/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */ +static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS]; + +static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb) +{ + return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size]; +} + +static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb) +{ + struct mlx5e_ipsec_metadata *mdata; + struct ethhdr *eth; + + if (unlikely(skb_cow_head(skb, sizeof(*mdata)))) + return ERR_PTR(-ENOMEM); + + eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata)); + skb->mac_header -= sizeof(*mdata); + mdata = (struct mlx5e_ipsec_metadata *)(eth + 1); + + memmove(skb->data, skb->data + sizeof(*mdata), + 2 * ETH_ALEN); + + eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE); + + memset(mdata->content.raw, 0, sizeof(mdata->content.raw)); + return mdata; +} + +static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) +{ + unsigned int alen = crypto_aead_authsize(x->data); + struct ipv6hdr *ipv6hdr = ipv6_hdr(skb); + struct iphdr *ipv4hdr = ip_hdr(skb); + unsigned int trailer_len; + u8 plen; + int ret; + + ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1); + if (unlikely(ret)) + return ret; + + trailer_len = alen + plen + 2; + + pskb_trim(skb, skb->len - trailer_len); + if (skb->protocol == htons(ETH_P_IP)) { + ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len); + ip_send_check(ipv4hdr); + } else { + ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) - + trailer_len); + } + return 0; +} + +static void mlx5e_ipsec_set_swp(struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, u8 mode, + struct xfrm_offload *xo) +{ + u8 proto; + + /* Tunnel Mode: + * SWP: OutL3 InL3 InL4 + * Pkt: MAC IP ESP IP L4 + * + * Transport Mode: + * SWP: OutL3 InL4 + * InL3 + * Pkt: MAC IP ESP L4 + * + * Offsets are in 2-byte words, counting from start of frame + */ + eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; + if (skb->protocol == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; + + if (mode == XFRM_MODE_TUNNEL) { + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + if (xo->proto == IPPROTO_IPV6) { + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + proto = inner_ipv6_hdr(skb)->nexthdr; + } else { + proto = inner_ip_hdr(skb)->protocol; + } + } else { + eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; + if (skb->protocol == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + proto = xo->proto; + } + switch (proto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + /* Fall through */ + case IPPROTO_TCP: + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + } +} + +void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo) +{ + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + __u32 oseq = replay_esn->oseq; + int iv_offset; + __be64 seqno; + u32 seq_hi; + + if (unlikely(skb_is_gso(skb) && oseq < MLX5E_IPSEC_ESN_SCOPE_MID && + MLX5E_IPSEC_ESN_SCOPE_MID < (oseq - skb_shinfo(skb)->gso_segs))) { + seq_hi = xo->seq.hi - 1; + } else { + seq_hi = xo->seq.hi; + } + + /* Place the SN in the IV field */ + seqno = cpu_to_be64(xo->seq.low + ((u64)seq_hi << 32)); + iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr); + skb_store_bits(skb, iv_offset, &seqno, 8); +} + +void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo) +{ + int iv_offset; + __be64 seqno; + + /* Place the SN in the IV field */ + seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); + iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr); + skb_store_bits(skb, iv_offset, &seqno, 8); +} + +static void mlx5e_ipsec_set_metadata(struct sk_buff *skb, + struct mlx5e_ipsec_metadata *mdata, + struct xfrm_offload *xo) +{ + struct ip_esp_hdr *esph; + struct tcphdr *tcph; + + if (skb_is_gso(skb)) { + /* Add LSO metadata indication */ + esph = ip_esp_hdr(skb); + tcph = inner_tcp_hdr(skb); + netdev_dbg(skb->dev, " Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n", + skb->network_header, + skb->transport_header, + skb->inner_network_header, + skb->inner_transport_header); + netdev_dbg(skb->dev, " Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n", + skb->len, skb_shinfo(skb)->gso_size, + ntohs(tcph->source), ntohs(tcph->dest), + ntohl(tcph->seq), ntohl(esph->seq_no)); + mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP; + mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb); + mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF); + } else { + mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD; + } + mdata->content.tx.esp_next_proto = xo->proto; + + netdev_dbg(skb->dev, " TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n", + mdata->syndrome, mdata->content.tx.esp_next_proto, + ntohs(mdata->content.tx.mss_inv), + ntohs(mdata->content.tx.seq)); +} + +struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, + struct mlx5e_tx_wqe *wqe, + struct sk_buff *skb) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct xfrm_offload *xo = xfrm_offload(skb); + struct mlx5e_ipsec_metadata *mdata; + struct mlx5e_ipsec_sa_entry *sa_entry; + struct xfrm_state *x; + + if (!xo) + return skb; + + if (unlikely(skb->sp->len != 1)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle); + goto drop; + } + + x = xfrm_input_state(skb); + if (unlikely(!x)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state); + goto drop; + } + + if (unlikely(!x->xso.offload_handle || + (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)))) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip); + goto drop; + } + + if (!skb_is_gso(skb)) + if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer); + goto drop; + } + mdata = mlx5e_ipsec_add_metadata(skb); + if (IS_ERR(mdata)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata); + goto drop; + } + mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo); + sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; + sa_entry->set_iv_op(skb, x, xo); + mlx5e_ipsec_set_metadata(skb, mdata, xo); + + return skb; + +drop: + kfree_skb(skb); + return NULL; +} + +static inline struct xfrm_state * +mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, + struct mlx5e_ipsec_metadata *mdata) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct xfrm_offload *xo; + struct xfrm_state *xs; + u32 sa_handle; + + skb->sp = secpath_dup(skb->sp); + if (unlikely(!skb->sp)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc); + return NULL; + } + + sa_handle = be32_to_cpu(mdata->content.rx.sa_handle); + xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle); + if (unlikely(!xs)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss); + return NULL; + } + + skb->sp->xvec[skb->sp->len++] = xs; + skb->sp->olen++; + + xo = xfrm_offload(skb); + xo->flags = CRYPTO_DONE; + switch (mdata->syndrome) { + case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED: + xo->status = CRYPTO_SUCCESS; + if (likely(priv->ipsec->no_trailer)) { + xo->flags |= XFRM_ESP_NO_TRAILER; + xo->proto = mdata->content.rx.nexthdr; + } + break; + case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED: + xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED; + break; + case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO: + xo->status = CRYPTO_INVALID_PROTOCOL; + break; + default: + atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome); + return NULL; + } + return xs; +} + +struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, u32 *cqe_bcnt) +{ + struct mlx5e_ipsec_metadata *mdata; + struct xfrm_state *xs; + + if (!is_metadata_hdr_valid(skb)) + return skb; + + /* Use the metadata */ + mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN); + xs = mlx5e_ipsec_build_sp(netdev, skb, mdata); + if (unlikely(!xs)) { + kfree_skb(skb); + return NULL; + } + + remove_metadata_hdr(skb); + *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN; + + return skb; +} + +bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, + netdev_features_t features) +{ + struct xfrm_state *x; + + if (skb->sp && skb->sp->len) { + x = skb->sp->xvec[0]; + if (x && x->xso.offload_handle) + return true; + } + return false; +} + +void mlx5e_ipsec_build_inverse_table(void) +{ + u16 mss_inv; + u32 mss; + + /* Calculate 1/x inverse table for use in GSO data path. + * Using this table, we provide the IPSec accelerator with the value of + * 1/gso_size so that it can infer the position of each segment inside + * the GSO, and increment the ESP sequence number, and generate the IV. + * The HW needs this value in Q0.16 fixed-point number format + */ + mlx5e_ipsec_inverse_table[1] = htons(0xFFFF); + for (mss = 2; mss < MAX_LSO_MSS; mss++) { + mss_inv = div_u64(1ULL << 32, mss) >> 16; + mlx5e_ipsec_inverse_table[mss] = htons(mss_inv); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h new file mode 100644 index 000000000..ca47c0540 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_IPSEC_RXTX_H__ +#define __MLX5E_IPSEC_RXTX_H__ + +#ifdef CONFIG_MLX5_EN_IPSEC + +#include <linux/skbuff.h> +#include <net/xfrm.h> +#include "en.h" + +struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, u32 *cqe_bcnt); +void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +void mlx5e_ipsec_inverse_table_init(void); +bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, + netdev_features_t features); +void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo); +void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo); +struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, + struct mlx5e_tx_wqe *wqe, + struct sk_buff *skb); + +#endif /* CONFIG_MLX5_EN_IPSEC */ + +#endif /* __MLX5E_IPSEC_RXTX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c new file mode 100644 index 000000000..6fea59223 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/ethtool.h> +#include <net/sock.h> + +#include "en.h" +#include "accel/ipsec.h" +#include "fpga/sdk.h" +#include "en_accel/ipsec.h" + +static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) }, +}; + +static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) }, +}; + +#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ + atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) + +#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc) +#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc) + +#define NUM_IPSEC_COUNTERS (NUM_IPSEC_HW_COUNTERS + NUM_IPSEC_SW_COUNTERS) + +int mlx5e_ipsec_get_count(struct mlx5e_priv *priv) +{ + if (!priv->ipsec) + return 0; + + return NUM_IPSEC_COUNTERS; +} + +int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + unsigned int i, idx = 0; + + if (!priv->ipsec) + return 0; + + for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_ipsec_hw_stats_desc[i].format); + + for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_ipsec_sw_stats_desc[i].format); + + return NUM_IPSEC_COUNTERS; +} + +void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv) +{ + int ret; + + if (!priv->ipsec) + return; + + ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats, + NUM_IPSEC_HW_COUNTERS); + if (ret) + memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats)); +} + +int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + int i, idx = 0; + + if (!priv->ipsec) + return 0; + + for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats, + mlx5e_ipsec_hw_stats_desc, i); + + for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats, + mlx5e_ipsec_sw_stats_desc, i); + + return NUM_IPSEC_COUNTERS; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c new file mode 100644 index 000000000..e88340e19 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/netdevice.h> +#include <net/ipv6.h> +#include "en_accel/tls.h" +#include "accel/tls.h" + +static void mlx5e_tls_set_ipv4_flow(void *flow, struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + + MLX5_SET(tls_flow, flow, ipv6, 0); + memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &inet->inet_daddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4)); + memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv4_layout.ipv4), + &inet->inet_rcv_saddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4)); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void mlx5e_tls_set_ipv6_flow(void *flow, struct sock *sk) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + MLX5_SET(tls_flow, flow, ipv6, 1); + memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &sk->sk_v6_daddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv6_layout.ipv6), + &np->saddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); +} +#endif + +static void mlx5e_tls_set_flow_tcp_ports(void *flow, struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + + memcpy(MLX5_ADDR_OF(tls_flow, flow, src_port), &inet->inet_sport, + MLX5_FLD_SZ_BYTES(tls_flow, src_port)); + memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_port), &inet->inet_dport, + MLX5_FLD_SZ_BYTES(tls_flow, dst_port)); +} + +static int mlx5e_tls_set_flow(void *flow, struct sock *sk, u32 caps) +{ + switch (sk->sk_family) { + case AF_INET: + mlx5e_tls_set_ipv4_flow(flow, sk); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + if (!sk->sk_ipv6only && + ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) { + mlx5e_tls_set_ipv4_flow(flow, sk); + break; + } + if (!(caps & MLX5_ACCEL_TLS_IPV6)) + goto error_out; + + mlx5e_tls_set_ipv6_flow(flow, sk); + break; +#endif + default: + goto error_out; + } + + mlx5e_tls_set_flow_tcp_ports(flow, sk); + return 0; +error_out: + return -EINVAL; +} + +static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct mlx5_core_dev *mdev = priv->mdev; + u32 caps = mlx5_accel_tls_device_caps(mdev); + int ret = -ENOMEM; + void *flow; + u32 swid; + + flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL); + if (!flow) + return ret; + + ret = mlx5e_tls_set_flow(flow, sk, caps); + if (ret) + goto free_flow; + + ret = mlx5_accel_tls_add_flow(mdev, flow, crypto_info, + start_offload_tcp_sn, &swid, + direction == TLS_OFFLOAD_CTX_DIR_TX); + if (ret < 0) + goto free_flow; + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + struct mlx5e_tls_offload_context_tx *tx_ctx = + mlx5e_get_tls_tx_context(tls_ctx); + + tx_ctx->swid = htonl(swid); + tx_ctx->expected_seq = start_offload_tcp_sn; + } else { + struct mlx5e_tls_offload_context_rx *rx_ctx = + mlx5e_get_tls_rx_context(tls_ctx); + + rx_ctx->handle = htonl(swid); + } + + return 0; +free_flow: + kfree(flow); + return ret; +} + +static void mlx5e_tls_del(struct net_device *netdev, + struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + unsigned int handle; + + handle = ntohl((direction == TLS_OFFLOAD_CTX_DIR_TX) ? + mlx5e_get_tls_tx_context(tls_ctx)->swid : + mlx5e_get_tls_rx_context(tls_ctx)->handle); + + mlx5_accel_tls_del_flow(priv->mdev, handle, + direction == TLS_OFFLOAD_CTX_DIR_TX); +} + +static void mlx5e_tls_resync_rx(struct net_device *netdev, struct sock *sk, + u32 seq, u64 rcd_sn) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_tls_offload_context_rx *rx_ctx; + + rx_ctx = mlx5e_get_tls_rx_context(tls_ctx); + + netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq, + be64_to_cpu(rcd_sn)); + mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn); + atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply); +} + +static const struct tlsdev_ops mlx5e_tls_ops = { + .tls_dev_add = mlx5e_tls_add, + .tls_dev_del = mlx5e_tls_del, + .tls_dev_resync_rx = mlx5e_tls_resync_rx, +}; + +void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + u32 caps; + + if (!mlx5_accel_is_tls_device(priv->mdev)) + return; + + caps = mlx5_accel_tls_device_caps(priv->mdev); + if (caps & MLX5_ACCEL_TLS_TX) { + netdev->features |= NETIF_F_HW_TLS_TX; + netdev->hw_features |= NETIF_F_HW_TLS_TX; + } + + if (caps & MLX5_ACCEL_TLS_RX) { + netdev->features |= NETIF_F_HW_TLS_RX; + netdev->hw_features |= NETIF_F_HW_TLS_RX; + } + + if (!(caps & MLX5_ACCEL_TLS_LRO)) { + netdev->features &= ~NETIF_F_LRO; + netdev->hw_features &= ~NETIF_F_LRO; + } + + netdev->tlsdev_ops = &mlx5e_tls_ops; +} + +int mlx5e_tls_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *tls = kzalloc(sizeof(*tls), GFP_KERNEL); + + if (!tls) + return -ENOMEM; + + priv->tls = tls; + return 0; +} + +void mlx5e_tls_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *tls = priv->tls; + + if (!tls) + return; + + kfree(tls); + priv->tls = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h new file mode 100644 index 000000000..3f5d72163 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#ifndef __MLX5E_TLS_H__ +#define __MLX5E_TLS_H__ + +#ifdef CONFIG_MLX5_EN_TLS + +#include <net/tls.h> +#include "en.h" + +struct mlx5e_tls_sw_stats { + atomic64_t tx_tls_drop_metadata; + atomic64_t tx_tls_drop_resync_alloc; + atomic64_t tx_tls_drop_no_sync_data; + atomic64_t tx_tls_drop_bypass_required; + atomic64_t rx_tls_drop_resync_request; + atomic64_t rx_tls_resync_request; + atomic64_t rx_tls_resync_reply; + atomic64_t rx_tls_auth_fail; +}; + +struct mlx5e_tls { + struct mlx5e_tls_sw_stats sw_stats; +}; + +struct mlx5e_tls_offload_context_tx { + struct tls_offload_context_tx base; + u32 expected_seq; + __be32 swid; +}; + +static inline struct mlx5e_tls_offload_context_tx * +mlx5e_get_tls_tx_context(struct tls_context *tls_ctx) +{ + BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_tx) > + TLS_OFFLOAD_CONTEXT_SIZE_TX); + return container_of(tls_offload_ctx_tx(tls_ctx), + struct mlx5e_tls_offload_context_tx, + base); +} + +struct mlx5e_tls_offload_context_rx { + struct tls_offload_context_rx base; + __be32 handle; +}; + +static inline struct mlx5e_tls_offload_context_rx * +mlx5e_get_tls_rx_context(struct tls_context *tls_ctx) +{ + BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_rx) > + TLS_OFFLOAD_CONTEXT_SIZE_RX); + return container_of(tls_offload_ctx_rx(tls_ctx), + struct mlx5e_tls_offload_context_rx, + base); +} + +void mlx5e_tls_build_netdev(struct mlx5e_priv *priv); +int mlx5e_tls_init(struct mlx5e_priv *priv); +void mlx5e_tls_cleanup(struct mlx5e_priv *priv); + +int mlx5e_tls_get_count(struct mlx5e_priv *priv); +int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data); +int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data); + +#else + +static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { } +static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { } +static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; } +static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; } +static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; } + +#endif + +#endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c new file mode 100644 index 000000000..be137d4a9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include "en_accel/tls.h" +#include "en_accel/tls_rxtx.h" +#include "accel/accel.h" + +#include <net/inet6_hashtables.h> +#include <linux/ipv6.h> + +#define SYNDROM_DECRYPTED 0x30 +#define SYNDROM_RESYNC_REQUEST 0x31 +#define SYNDROM_AUTH_FAILED 0x32 + +#define SYNDROME_OFFLOAD_REQUIRED 32 +#define SYNDROME_SYNC 33 + +struct sync_info { + u64 rcd_sn; + s32 sync_len; + int nr_frags; + skb_frag_t frags[MAX_SKB_FRAGS]; +}; + +struct recv_metadata_content { + u8 syndrome; + u8 reserved; + __be32 sync_seq; +} __packed; + +struct send_metadata_content { + /* One byte of syndrome followed by 3 bytes of swid */ + __be32 syndrome_swid; + __be16 first_seq; +} __packed; + +struct mlx5e_tls_metadata { + union { + /* from fpga to host */ + struct recv_metadata_content recv; + /* from host to fpga */ + struct send_metadata_content send; + unsigned char raw[6]; + } __packed content; + /* packet type ID field */ + __be16 ethertype; +} __packed; + +static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid) +{ + struct mlx5e_tls_metadata *pet; + struct ethhdr *eth; + + if (skb_cow_head(skb, sizeof(struct mlx5e_tls_metadata))) + return -ENOMEM; + + eth = (struct ethhdr *)skb_push(skb, sizeof(struct mlx5e_tls_metadata)); + skb->mac_header -= sizeof(struct mlx5e_tls_metadata); + pet = (struct mlx5e_tls_metadata *)(eth + 1); + + memmove(skb->data, skb->data + sizeof(struct mlx5e_tls_metadata), + 2 * ETH_ALEN); + + eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE); + pet->content.send.syndrome_swid = + htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid; + + return 0; +} + +static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context_tx *context, + u32 tcp_seq, struct sync_info *info) +{ + int remaining, i = 0, ret = -EINVAL; + struct tls_record_info *record; + unsigned long flags; + s32 sync_size; + + spin_lock_irqsave(&context->base.lock, flags); + record = tls_get_record(&context->base, tcp_seq, &info->rcd_sn); + + if (unlikely(!record)) + goto out; + + sync_size = tcp_seq - tls_record_start_seq(record); + info->sync_len = sync_size; + if (unlikely(sync_size < 0)) { + if (tls_record_is_start_marker(record)) + goto done; + + goto out; + } + + remaining = sync_size; + while (remaining > 0) { + info->frags[i] = record->frags[i]; + __skb_frag_ref(&info->frags[i]); + remaining -= skb_frag_size(&info->frags[i]); + + if (remaining < 0) + skb_frag_size_add(&info->frags[i], remaining); + + i++; + } + info->nr_frags = i; +done: + ret = 0; +out: + spin_unlock_irqrestore(&context->base.lock, flags); + return ret; +} + +static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb, + struct sk_buff *nskb, u32 tcp_seq, + int headln, __be64 rcd_sn) +{ + struct mlx5e_tls_metadata *pet; + u8 syndrome = SYNDROME_SYNC; + struct iphdr *iph; + struct tcphdr *th; + int data_len, mss; + + nskb->dev = skb->dev; + skb_reset_mac_header(nskb); + skb_set_network_header(nskb, skb_network_offset(skb)); + skb_set_transport_header(nskb, skb_transport_offset(skb)); + memcpy(nskb->data, skb->data, headln); + memcpy(nskb->data + headln, &rcd_sn, sizeof(rcd_sn)); + + iph = ip_hdr(nskb); + iph->tot_len = htons(nskb->len - skb_network_offset(nskb)); + th = tcp_hdr(nskb); + data_len = nskb->len - headln; + tcp_seq -= data_len; + th->seq = htonl(tcp_seq); + + mss = nskb->dev->mtu - (headln - skb_network_offset(nskb)); + skb_shinfo(nskb)->gso_size = 0; + if (data_len > mss) { + skb_shinfo(nskb)->gso_size = mss; + skb_shinfo(nskb)->gso_segs = DIV_ROUND_UP(data_len, mss); + } + skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; + + pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr)); + memcpy(pet, &syndrome, sizeof(syndrome)); + pet->content.send.first_seq = htons(tcp_seq); + + /* MLX5 devices don't care about the checksum partial start, offset + * and pseudo header + */ + nskb->ip_summed = CHECKSUM_PARTIAL; + + nskb->xmit_more = 1; + nskb->queue_mapping = skb->queue_mapping; +} + +static struct sk_buff * +mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context, + struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_tx_wqe **wqe, + u16 *pi, + struct mlx5e_tls *tls) +{ + u32 tcp_seq = ntohl(tcp_hdr(skb)->seq); + struct sync_info info; + struct sk_buff *nskb; + int linear_len = 0; + int headln; + int i; + + sq->stats->tls_ooo++; + + if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) { + /* We might get here if a retransmission reaches the driver + * after the relevant record is acked. + * It should be safe to drop the packet in this case + */ + atomic64_inc(&tls->sw_stats.tx_tls_drop_no_sync_data); + goto err_out; + } + + if (unlikely(info.sync_len < 0)) { + u32 payload; + + headln = skb_transport_offset(skb) + tcp_hdrlen(skb); + payload = skb->len - headln; + if (likely(payload <= -info.sync_len)) + /* SKB payload doesn't require offload + */ + return skb; + + atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required); + goto err_out; + } + + if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) { + atomic64_inc(&tls->sw_stats.tx_tls_drop_metadata); + goto err_out; + } + + headln = skb_transport_offset(skb) + tcp_hdrlen(skb); + linear_len += headln + sizeof(info.rcd_sn); + nskb = alloc_skb(linear_len, GFP_ATOMIC); + if (unlikely(!nskb)) { + atomic64_inc(&tls->sw_stats.tx_tls_drop_resync_alloc); + goto err_out; + } + + context->expected_seq = tcp_seq + skb->len - headln; + skb_put(nskb, linear_len); + for (i = 0; i < info.nr_frags; i++) + skb_shinfo(nskb)->frags[i] = info.frags[i]; + + skb_shinfo(nskb)->nr_frags = info.nr_frags; + nskb->data_len = info.sync_len; + nskb->len += info.sync_len; + sq->stats->tls_resync_bytes += nskb->len; + mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln, + cpu_to_be64(info.rcd_sn)); + mlx5e_sq_xmit(sq, nskb, *wqe, *pi); + mlx5e_sq_fetch_wqe(sq, wqe, pi); + return skb; + +err_out: + dev_kfree_skb_any(skb); + return NULL; +} + +struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_tx_wqe **wqe, + u16 *pi) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_tls_offload_context_tx *context; + struct tls_context *tls_ctx; + u32 expected_seq; + int datalen; + u32 skb_seq; + + if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) + goto out; + + datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + if (!datalen) + goto out; + + tls_ctx = tls_get_ctx(skb->sk); + if (unlikely(tls_ctx->netdev != netdev)) + goto out; + + skb_seq = ntohl(tcp_hdr(skb)->seq); + context = mlx5e_get_tls_tx_context(tls_ctx); + expected_seq = context->expected_seq; + + if (unlikely(expected_seq != skb_seq)) { + skb = mlx5e_tls_handle_ooo(context, sq, skb, wqe, pi, priv->tls); + goto out; + } + + if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) { + atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata); + dev_kfree_skb_any(skb); + skb = NULL; + goto out; + } + + context->expected_seq = skb_seq + datalen; +out: + return skb; +} + +static int tls_update_resync_sn(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5e_tls_metadata *mdata) +{ + struct sock *sk = NULL; + struct iphdr *iph; + struct tcphdr *th; + __be32 seq; + + if (mdata->ethertype != htons(ETH_P_IP)) + return -EINVAL; + + iph = (struct iphdr *)(mdata + 1); + + th = ((void *)iph) + iph->ihl * 4; + + if (iph->version == 4) { + sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo, + iph->saddr, th->source, iph->daddr, + th->dest, netdev->ifindex); +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph; + + sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo, + &ipv6h->saddr, th->source, + &ipv6h->daddr, ntohs(th->dest), + netdev->ifindex, 0); +#endif + } + if (!sk || sk->sk_state == TCP_TIME_WAIT) { + struct mlx5e_priv *priv = netdev_priv(netdev); + + atomic64_inc(&priv->tls->sw_stats.rx_tls_drop_resync_request); + goto out; + } + + skb->sk = sk; + skb->destructor = sock_edemux; + + memcpy(&seq, &mdata->content.recv.sync_seq, sizeof(seq)); + tls_offload_rx_resync_request(sk, seq); +out: + return 0; +} + +void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, + u32 *cqe_bcnt) +{ + struct mlx5e_tls_metadata *mdata; + struct mlx5e_priv *priv; + + if (!is_metadata_hdr_valid(skb)) + return; + + /* Use the metadata */ + mdata = (struct mlx5e_tls_metadata *)(skb->data + ETH_HLEN); + switch (mdata->content.recv.syndrome) { + case SYNDROM_DECRYPTED: + skb->decrypted = 1; + break; + case SYNDROM_RESYNC_REQUEST: + tls_update_resync_sn(netdev, skb, mdata); + priv = netdev_priv(netdev); + atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_request); + break; + case SYNDROM_AUTH_FAILED: + /* Authentication failure will be observed and verified by kTLS */ + priv = netdev_priv(netdev); + atomic64_inc(&priv->tls->sw_stats.rx_tls_auth_fail); + break; + default: + /* Bypass the metadata header to others */ + return; + } + + remove_metadata_hdr(skb); + *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h new file mode 100644 index 000000000..311667ec7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_TLS_RXTX_H__ +#define __MLX5E_TLS_RXTX_H__ + +#ifdef CONFIG_MLX5_EN_TLS + +#include <linux/skbuff.h> +#include "en.h" + +struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_tx_wqe **wqe, + u16 *pi); + +void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, + u32 *cqe_bcnt); + +#endif /* CONFIG_MLX5_EN_TLS */ + +#endif /* __MLX5E_TLS_RXTX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c new file mode 100644 index 000000000..01468ec27 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/ethtool.h> +#include <net/sock.h> + +#include "en.h" +#include "accel/tls.h" +#include "fpga/sdk.h" +#include "en_accel/tls.h" + +static const struct counter_desc mlx5e_tls_sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_metadata) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_resync_alloc) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_no_sync_data) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) }, +}; + +#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ + atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) + +#define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc) + +int mlx5e_tls_get_count(struct mlx5e_priv *priv) +{ + if (!priv->tls) + return 0; + + return NUM_TLS_SW_COUNTERS; +} + +int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + unsigned int i, idx = 0; + + if (!priv->tls) + return 0; + + for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_tls_sw_stats_desc[i].format); + + return NUM_TLS_SW_COUNTERS; +} + +int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + int i, idx = 0; + + if (!priv->tls) + return 0; + + for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats, + mlx5e_tls_sw_stats_desc, i); + + return NUM_TLS_SW_COUNTERS; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c new file mode 100644 index 000000000..a4be04deb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -0,0 +1,710 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/hash.h> +#include <linux/mlx5/fs.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "en.h" + +struct arfs_tuple { + __be16 etype; + u8 ip_proto; + union { + __be32 src_ipv4; + struct in6_addr src_ipv6; + }; + union { + __be32 dst_ipv4; + struct in6_addr dst_ipv6; + }; + __be16 src_port; + __be16 dst_port; +}; + +struct arfs_rule { + struct mlx5e_priv *priv; + struct work_struct arfs_work; + struct mlx5_flow_handle *rule; + struct hlist_node hlist; + int rxq; + /* Flow ID passed to ndo_rx_flow_steer */ + int flow_id; + /* Filter ID returned by ndo_rx_flow_steer */ + int filter_id; + struct arfs_tuple tuple; +}; + +#define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \ + for (i = 0; i < ARFS_NUM_TYPES; i++) \ + mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j) + +#define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \ + for (j = 0; j < ARFS_HASH_SIZE; j++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist) + +static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) +{ + switch (type) { + case ARFS_IPV4_TCP: + return MLX5E_TT_IPV4_TCP; + case ARFS_IPV4_UDP: + return MLX5E_TT_IPV4_UDP; + case ARFS_IPV6_TCP: + return MLX5E_TT_IPV6_TCP; + case ARFS_IPV6_UDP: + return MLX5E_TT_IPV6_UDP; + default: + return -EINVAL; + } +} + +static int arfs_disable(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5e_tir *tir = priv->indir_tir; + int err = 0; + int tt; + int i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (i = 0; i < ARFS_NUM_TYPES; i++) { + dest.tir_num = tir[i].tirn; + tt = arfs_get_tt(i); + /* Modify ttc rules destination to bypass the aRFS tables*/ + err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], + &dest, NULL); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc destination failed\n", + __func__); + return err; + } + } + return 0; +} + +static void arfs_del_rules(struct mlx5e_priv *priv); + +int mlx5e_arfs_disable(struct mlx5e_priv *priv) +{ + arfs_del_rules(priv); + + return arfs_disable(priv); +} + +int mlx5e_arfs_enable(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest = {}; + int err = 0; + int tt; + int i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + for (i = 0; i < ARFS_NUM_TYPES; i++) { + dest.ft = priv->fs.arfs.arfs_tables[i].ft.t; + tt = arfs_get_tt(i); + /* Modify ttc rules destination to point on the aRFS FTs */ + err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], + &dest, NULL); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc destination failed err=%d\n", + __func__, err); + arfs_disable(priv); + return err; + } + } + return 0; +} + +static void arfs_destroy_table(struct arfs_table *arfs_t) +{ + mlx5_del_flow_rules(arfs_t->default_rule); + mlx5e_destroy_flow_table(&arfs_t->ft); +} + +void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) +{ + int i; + + if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) + return; + + arfs_del_rules(priv); + destroy_workqueue(priv->fs.arfs.wq); + for (i = 0; i < ARFS_NUM_TYPES; i++) { + if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t)) + arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]); + } +} + +static int arfs_add_default_rule(struct mlx5e_priv *priv, + enum arfs_type type) +{ + struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; + struct mlx5e_tir *tir = priv->indir_tir; + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_spec *spec; + enum mlx5e_traffic_types tt; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + tt = arfs_get_tt(type); + if (tt == -EINVAL) { + netdev_err(priv->netdev, "%s: bad arfs_type: %d\n", + __func__, type); + err = -EINVAL; + goto out; + } + + dest.tir_num = tir[tt].tirn; + + arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec, + &flow_act, + &dest, 1); + if (IS_ERR(arfs_t->default_rule)) { + err = PTR_ERR(arfs_t->default_rule); + arfs_t->default_rule = NULL; + netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n", + __func__, type); + } +out: + kvfree(spec); + return err; +} + +#define MLX5E_ARFS_NUM_GROUPS 2 +#define MLX5E_ARFS_GROUP1_SIZE (BIT(16) - 1) +#define MLX5E_ARFS_GROUP2_SIZE BIT(0) +#define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\ + MLX5E_ARFS_GROUP2_SIZE) +static int arfs_create_groups(struct mlx5e_flow_table *ft, + enum arfs_type type) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS, + sizeof(*ft->g), GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in || !ft->g) { + kvfree(ft->g); + kvfree(in); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype); + switch (type) { + case ARFS_IPV4_TCP: + case ARFS_IPV6_TCP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport); + break; + case ARFS_IPV4_UDP: + case ARFS_IPV6_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport); + break; + default: + err = -EINVAL; + goto out; + } + + switch (type) { + case ARFS_IPV4_TCP: + case ARFS_IPV4_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + break; + case ARFS_IPV6_TCP: + case ARFS_IPV6_UDP: + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); + break; + default: + err = -EINVAL; + goto out; + } + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ARFS_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ARFS_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; +out: + kvfree(in); + + return err; +} + +static int arfs_create_table(struct mlx5e_priv *priv, + enum arfs_type type) +{ + struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_ARFS_TABLE_SIZE; + ft_attr.level = MLX5E_ARFS_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = arfs_create_groups(ft, type); + if (err) + goto err; + + err = arfs_add_default_rule(priv, type); + if (err) + goto err; + + return 0; +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) +{ + int err = 0; + int i; + + if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) + return 0; + + spin_lock_init(&priv->fs.arfs.arfs_lock); + INIT_LIST_HEAD(&priv->fs.arfs.rules); + priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs"); + if (!priv->fs.arfs.wq) + return -ENOMEM; + + for (i = 0; i < ARFS_NUM_TYPES; i++) { + err = arfs_create_table(priv, i); + if (err) + goto err; + } + return 0; +err: + mlx5e_arfs_destroy_tables(priv); + return err; +} + +#define MLX5E_ARFS_EXPIRY_QUOTA 60 + +static void arfs_may_expire_flow(struct mlx5e_priv *priv) +{ + struct arfs_rule *arfs_rule; + struct hlist_node *htmp; + int quota = 0; + int i; + int j; + + HLIST_HEAD(del_list); + spin_lock_bh(&priv->fs.arfs.arfs_lock); + mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) { + if (!work_pending(&arfs_rule->arfs_work) && + rps_may_expire_flow(priv->netdev, + arfs_rule->rxq, arfs_rule->flow_id, + arfs_rule->filter_id)) { + hlist_del_init(&arfs_rule->hlist); + hlist_add_head(&arfs_rule->hlist, &del_list); + if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA) + break; + } + } + spin_unlock_bh(&priv->fs.arfs.arfs_lock); + hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) { + if (arfs_rule->rule) + mlx5_del_flow_rules(arfs_rule->rule); + hlist_del(&arfs_rule->hlist); + kfree(arfs_rule); + } +} + +static void arfs_del_rules(struct mlx5e_priv *priv) +{ + struct hlist_node *htmp; + struct arfs_rule *rule; + int i; + int j; + + HLIST_HEAD(del_list); + spin_lock_bh(&priv->fs.arfs.arfs_lock); + mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) { + hlist_del_init(&rule->hlist); + hlist_add_head(&rule->hlist, &del_list); + } + spin_unlock_bh(&priv->fs.arfs.arfs_lock); + + hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) { + cancel_work_sync(&rule->arfs_work); + if (rule->rule) + mlx5_del_flow_rules(rule->rule); + hlist_del(&rule->hlist); + kfree(rule); + } +} + +static struct hlist_head * +arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port, + __be16 dst_port) +{ + unsigned long l; + int bucket_idx; + + l = (__force unsigned long)src_port | + ((__force unsigned long)dst_port << 2); + + bucket_idx = hash_long(l, ARFS_HASH_SHIFT); + + return &arfs_t->rules_hash[bucket_idx]; +} + +static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, + u8 ip_proto, __be16 etype) +{ + if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP) + return &arfs->arfs_tables[ARFS_IPV4_TCP]; + if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP) + return &arfs->arfs_tables[ARFS_IPV4_UDP]; + if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP) + return &arfs->arfs_tables[ARFS_IPV6_TCP]; + if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP) + return &arfs->arfs_tables[ARFS_IPV6_UDP]; + + return NULL; +} + +static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, + struct arfs_rule *arfs_rule) +{ + struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct arfs_tuple *tuple = &arfs_rule->tuple; + struct mlx5_flow_handle *rule = NULL; + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct arfs_table *arfs_table; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, + ntohs(tuple->etype)); + arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype); + if (!arfs_table) { + err = -EINVAL; + goto out; + } + + ft = arfs_table->ft.t; + if (tuple->ip_proto == IPPROTO_TCP) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.tcp_dport); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.tcp_sport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport, + ntohs(tuple->dst_port)); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport, + ntohs(tuple->src_port)); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.udp_dport); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.udp_sport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, + ntohs(tuple->dst_port)); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport, + ntohs(tuple->src_port)); + } + if (tuple->etype == htons(ETH_P_IP)) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), + &tuple->src_ipv4, + 4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &tuple->dst_ipv4, + 4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + &tuple->src_ipv6, + 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &tuple->dst_ipv6, + 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, + 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, + 16); + } + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n", + __func__, arfs_rule->filter_id, arfs_rule->rxq, err); + } + +out: + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static void arfs_modify_rule_rq(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, u16 rxq) +{ + struct mlx5_flow_destination dst = {}; + int err = 0; + + dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst.tir_num = priv->direct_tir[rxq].tirn; + err = mlx5_modify_rule_destination(rule, &dst, NULL); + if (err) + netdev_warn(priv->netdev, + "Failed to modify aRFS rule destination to rq=%d\n", rxq); +} + +static void arfs_handle_work(struct work_struct *work) +{ + struct arfs_rule *arfs_rule = container_of(work, + struct arfs_rule, + arfs_work); + struct mlx5e_priv *priv = arfs_rule->priv; + struct mlx5_flow_handle *rule; + + mutex_lock(&priv->state_lock); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + spin_lock_bh(&priv->fs.arfs.arfs_lock); + hlist_del(&arfs_rule->hlist); + spin_unlock_bh(&priv->fs.arfs.arfs_lock); + + mutex_unlock(&priv->state_lock); + kfree(arfs_rule); + goto out; + } + mutex_unlock(&priv->state_lock); + + if (!arfs_rule->rule) { + rule = arfs_add_rule(priv, arfs_rule); + if (IS_ERR(rule)) + goto out; + arfs_rule->rule = rule; + } else { + arfs_modify_rule_rq(priv, arfs_rule->rule, + arfs_rule->rxq); + } +out: + arfs_may_expire_flow(priv); +} + +static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, + struct arfs_table *arfs_t, + const struct flow_keys *fk, + u16 rxq, u32 flow_id) +{ + struct arfs_rule *rule; + struct arfs_tuple *tuple; + + rule = kzalloc(sizeof(*rule), GFP_ATOMIC); + if (!rule) + return NULL; + + rule->priv = priv; + rule->rxq = rxq; + INIT_WORK(&rule->arfs_work, arfs_handle_work); + + tuple = &rule->tuple; + tuple->etype = fk->basic.n_proto; + tuple->ip_proto = fk->basic.ip_proto; + if (tuple->etype == htons(ETH_P_IP)) { + tuple->src_ipv4 = fk->addrs.v4addrs.src; + tuple->dst_ipv4 = fk->addrs.v4addrs.dst; + } else { + memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src, + sizeof(struct in6_addr)); + memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst, + sizeof(struct in6_addr)); + } + tuple->src_port = fk->ports.src; + tuple->dst_port = fk->ports.dst; + + rule->flow_id = flow_id; + rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER; + + hlist_add_head(&rule->hlist, + arfs_hash_bucket(arfs_t, tuple->src_port, + tuple->dst_port)); + return rule; +} + +static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk) +{ + if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst) + return false; + if (tuple->etype != fk->basic.n_proto) + return false; + if (tuple->etype == htons(ETH_P_IP)) + return tuple->src_ipv4 == fk->addrs.v4addrs.src && + tuple->dst_ipv4 == fk->addrs.v4addrs.dst; + if (tuple->etype == htons(ETH_P_IPV6)) + return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src, + sizeof(struct in6_addr)) && + !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst, + sizeof(struct in6_addr)); + return false; +} + +static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t, + const struct flow_keys *fk) +{ + struct arfs_rule *arfs_rule; + struct hlist_head *head; + + head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst); + hlist_for_each_entry(arfs_rule, head, hlist) { + if (arfs_cmp(&arfs_rule->tuple, fk)) + return arfs_rule; + } + + return NULL; +} + +int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct arfs_table *arfs_t; + struct arfs_rule *arfs_rule; + struct flow_keys fk; + + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) + return -EPROTONOSUPPORT; + + if (fk.basic.n_proto != htons(ETH_P_IP) && + fk.basic.n_proto != htons(ETH_P_IPV6)) + return -EPROTONOSUPPORT; + + if (skb->encapsulation) + return -EPROTONOSUPPORT; + + arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto); + if (!arfs_t) + return -EPROTONOSUPPORT; + + spin_lock_bh(&arfs->arfs_lock); + arfs_rule = arfs_find_rule(arfs_t, &fk); + if (arfs_rule) { + if (arfs_rule->rxq == rxq_index) { + spin_unlock_bh(&arfs->arfs_lock); + return arfs_rule->filter_id; + } + arfs_rule->rxq = rxq_index; + } else { + arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id); + if (!arfs_rule) { + spin_unlock_bh(&arfs->arfs_lock); + return -ENOMEM; + } + } + queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work); + spin_unlock_bh(&arfs->arfs_lock); + return arfs_rule->filter_id; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c new file mode 100644 index 000000000..124e4567a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +/* mlx5e global resources should be placed in this file. + * Global resources are common to all the netdevices crated on the same nic. + */ + +int mlx5e_create_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir, u32 *in, int inlen) +{ + int err; + + err = mlx5_core_create_tir(mdev, in, inlen, &tir->tirn); + if (err) + return err; + + mutex_lock(&mdev->mlx5e_res.td.list_lock); + list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); + + return 0; +} + +void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir) +{ + mutex_lock(&mdev->mlx5e_res.td.list_lock); + mlx5_core_destroy_tir(mdev, tir->tirn); + list_del(&tir->list); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); +} + +static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + struct mlx5_core_mkey *mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + + kvfree(in); + return err; +} + +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_resources *res = &mdev->mlx5e_res; + int err; + + err = mlx5_core_alloc_pd(mdev, &res->pdn); + if (err) { + mlx5_core_err(mdev, "alloc pd failed, %d\n", err); + return err; + } + + err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn); + if (err) { + mlx5_core_err(mdev, "alloc td failed, %d\n", err); + goto err_dealloc_pd; + } + + err = mlx5e_create_mkey(mdev, res->pdn, &res->mkey); + if (err) { + mlx5_core_err(mdev, "create mkey failed, %d\n", err); + goto err_dealloc_transport_domain; + } + + err = mlx5_alloc_bfreg(mdev, &res->bfreg, false, false); + if (err) { + mlx5_core_err(mdev, "alloc bfreg failed, %d\n", err); + goto err_destroy_mkey; + } + + INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); + mutex_init(&mdev->mlx5e_res.td.list_lock); + + return 0; + +err_destroy_mkey: + mlx5_core_destroy_mkey(mdev, &res->mkey); +err_dealloc_transport_domain: + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); +err_dealloc_pd: + mlx5_core_dealloc_pd(mdev, res->pdn); + return err; +} + +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_resources *res = &mdev->mlx5e_res; + + mlx5_free_bfreg(mdev, &res->bfreg); + mlx5_core_destroy_mkey(mdev, &res->mkey); + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); + mlx5_core_dealloc_pd(mdev, res->pdn); + memset(res, 0, sizeof(*res)); +} + +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_tir *tir; + int err = 0; + u32 tirn = 0; + int inlen; + void *in; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto out; + } + + if (enable_uc_lb) + MLX5_SET(modify_tir_in, in, ctx.self_lb_block, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_); + + MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + + mutex_lock(&mdev->mlx5e_res.td.list_lock); + list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { + tirn = tir->tirn; + err = mlx5_core_modify_tir(mdev, tirn, in, inlen); + if (err) + goto out; + } + +out: + kvfree(in); + if (err) + netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); + + return err; +} + +u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev) +{ + u8 min_inline_mode; + + mlx5_query_min_inline(mdev, &min_inline_mode); + if (min_inline_mode == MLX5_INLINE_MODE_NONE && + !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + min_inline_mode = MLX5_INLINE_MODE_L2; + + return min_inline_mode; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c new file mode 100644 index 000000000..722998d68 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -0,0 +1,1206 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/device.h> +#include <linux/netdevice.h> +#include "en.h" +#include "en/port.h" +#include "en/port_buffer.h" + +#define MLX5E_100MB (100000) +#define MLX5E_1GB (1000000) + +#define MLX5E_CEE_STATE_UP 1 +#define MLX5E_CEE_STATE_DOWN 0 + +/* Max supported cable length is 1000 meters */ +#define MLX5E_MAX_CABLE_LENGTH 1000 + +enum { + MLX5E_VENDOR_TC_GROUP_NUM = 7, + MLX5E_LOWEST_PRIO_GROUP = 0, +}; + +#define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg) && \ + MLX5_CAP_QCAM_REG(mdev, qpts) && \ + MLX5_CAP_QCAM_REG(mdev, qpdpm)) + +static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state); +static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio); + +/* If dcbx mode is non-host set the dcbx mode to host. + */ +static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv, + enum mlx5_dcbx_oper_mode mode) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 param[MLX5_ST_SZ_DW(dcbx_param)]; + int err; + + err = mlx5_query_port_dcbx_param(mdev, param); + if (err) + return err; + + MLX5_SET(dcbx_param, param, version_admin, mode); + if (mode != MLX5E_DCBX_PARAM_VER_OPER_HOST) + MLX5_SET(dcbx_param, param, willing_admin, 1); + + return mlx5_set_port_dcbx_param(mdev, param); +} + +static int mlx5e_dcbnl_switch_to_host_mode(struct mlx5e_priv *priv) +{ + struct mlx5e_dcbx *dcbx = &priv->dcbx; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, dcbx)) + return 0; + + if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST) + return 0; + + err = mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_HOST); + if (err) + return err; + + dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_HOST; + return 0; +} + +static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, + struct ieee_ets *ets) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 tc_group[IEEE_8021QAZ_MAX_TCS]; + bool is_tc_group_6_exist = false; + bool is_zero_bw_ets_tc = false; + int err = 0; + int i; + + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return -EOPNOTSUPP; + + ets->ets_cap = mlx5_max_tc(priv->mdev) + 1; + for (i = 0; i < ets->ets_cap; i++) { + err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]); + if (err) + return err; + + err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]); + if (err) + return err; + + err = mlx5_query_port_tc_bw_alloc(mdev, i, &ets->tc_tx_bw[i]); + if (err) + return err; + + if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC && + tc_group[i] == (MLX5E_LOWEST_PRIO_GROUP + 1)) + is_zero_bw_ets_tc = true; + + if (tc_group[i] == (MLX5E_VENDOR_TC_GROUP_NUM - 1)) + is_tc_group_6_exist = true; + } + + /* Report 0% ets tc if exits*/ + if (is_zero_bw_ets_tc) { + for (i = 0; i < ets->ets_cap; i++) + if (tc_group[i] == MLX5E_LOWEST_PRIO_GROUP) + ets->tc_tx_bw[i] = 0; + } + + /* Update tc_tsa based on fw setting*/ + for (i = 0; i < ets->ets_cap; i++) { + if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC) + priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; + else if (tc_group[i] == MLX5E_VENDOR_TC_GROUP_NUM && + !is_tc_group_6_exist) + priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR; + } + memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa)); + + return err; +} + +static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) +{ + bool any_tc_mapped_to_ets = false; + bool ets_zero_bw = false; + int strict_group; + int i; + + for (i = 0; i <= max_tc; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { + any_tc_mapped_to_ets = true; + if (!ets->tc_tx_bw[i]) + ets_zero_bw = true; + } + } + + /* strict group has higher priority than ets group */ + strict_group = MLX5E_LOWEST_PRIO_GROUP; + if (any_tc_mapped_to_ets) + strict_group++; + if (ets_zero_bw) + strict_group++; + + for (i = 0; i <= max_tc; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_VENDOR: + tc_group[i] = MLX5E_VENDOR_TC_GROUP_NUM; + break; + case IEEE_8021QAZ_TSA_STRICT: + tc_group[i] = strict_group++; + break; + case IEEE_8021QAZ_TSA_ETS: + tc_group[i] = MLX5E_LOWEST_PRIO_GROUP; + if (ets->tc_tx_bw[i] && ets_zero_bw) + tc_group[i] = MLX5E_LOWEST_PRIO_GROUP + 1; + break; + } + } +} + +static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, + u8 *tc_group, int max_tc) +{ + int bw_for_ets_zero_bw_tc = 0; + int last_ets_zero_bw_tc = -1; + int num_ets_zero_bw = 0; + int i; + + for (i = 0; i <= max_tc; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS && + !ets->tc_tx_bw[i]) { + num_ets_zero_bw++; + last_ets_zero_bw_tc = i; + } + } + + if (num_ets_zero_bw) + bw_for_ets_zero_bw_tc = MLX5E_MAX_BW_ALLOC / num_ets_zero_bw; + + for (i = 0; i <= max_tc; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_VENDOR: + tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; + break; + case IEEE_8021QAZ_TSA_STRICT: + tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; + break; + case IEEE_8021QAZ_TSA_ETS: + tc_tx_bw[i] = ets->tc_tx_bw[i] ? + ets->tc_tx_bw[i] : + bw_for_ets_zero_bw_tc; + break; + } + } + + /* Make sure the total bw for ets zero bw group is 100% */ + if (last_ets_zero_bw_tc != -1) + tc_tx_bw[last_ets_zero_bw_tc] += + MLX5E_MAX_BW_ALLOC % num_ets_zero_bw; +} + +/* If there are ETS BW 0, + * Set ETS group # to 1 for all ETS non zero BW tcs. Their sum must be 100%. + * Set group #0 to all the ETS BW 0 tcs and + * equally splits the 100% BW between them + * Report both group #0 and #1 as ETS type. + * All the tcs in group #0 will be reported with 0% BW. + */ +int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS]; + u8 tc_group[IEEE_8021QAZ_MAX_TCS]; + int max_tc = mlx5_max_tc(mdev); + int err, i; + + mlx5e_build_tc_group(ets, tc_group, max_tc); + mlx5e_build_tc_tx_bw(ets, tc_tx_bw, tc_group, max_tc); + + err = mlx5_set_port_prio_tc(mdev, ets->prio_tc); + if (err) + return err; + + err = mlx5_set_port_tc_group(mdev, tc_group); + if (err) + return err; + + err = mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw); + + if (err) + return err; + + memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa)); + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + mlx5e_dbg(HW, priv, "%s: prio_%d <=> tc_%d\n", + __func__, i, ets->prio_tc[i]); + mlx5e_dbg(HW, priv, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n", + __func__, i, tc_tx_bw[i], tc_group[i]); + } + + return err; +} + +static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, + struct ieee_ets *ets, + bool zero_sum_allowed) +{ + bool have_ets_tc = false; + int bw_sum = 0; + int i; + + /* Validate Priority */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) { + netdev_err(netdev, + "Failed to validate ETS: priority value greater than max(%d)\n", + MLX5E_MAX_PRIORITY); + return -EINVAL; + } + } + + /* Validate Bandwidth Sum */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { + have_ets_tc = true; + bw_sum += ets->tc_tx_bw[i]; + } + } + + if (have_ets_tc && bw_sum != 100) { + if (bw_sum || (!bw_sum && !zero_sum_allowed)) + netdev_err(netdev, + "Failed to validate ETS: BW sum is illegal\n"); + return -EINVAL; + } + return 0; +} + +static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev, + struct ieee_ets *ets) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return -EOPNOTSUPP; + + err = mlx5e_dbcnl_validate_ets(netdev, ets, false); + if (err) + return err; + + err = mlx5e_dcbnl_ieee_setets_core(priv, ets); + if (err) + return err; + + return 0; +} + +static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + int i; + + pfc->pfc_cap = mlx5_max_tc(mdev) + 1; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + pfc->requests[i] = PPORT_PER_PRIO_GET(pstats, i, tx_pause); + pfc->indications[i] = PPORT_PER_PRIO_GET(pstats, i, rx_pause); + } + + if (MLX5_BUFFER_SUPPORTED(mdev)) + pfc->delay = priv->dcbx.cable_len; + + return mlx5_query_port_pfc(mdev, &pfc->pfc_en, NULL); +} + +static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 old_cable_len = priv->dcbx.cable_len; + struct ieee_pfc pfc_new; + u32 changed = 0; + u8 curr_pfc_en; + int ret = 0; + + /* pfc_en */ + mlx5_query_port_pfc(mdev, &curr_pfc_en, NULL); + if (pfc->pfc_en != curr_pfc_en) { + ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en); + if (ret) + return ret; + mlx5_toggle_port_link(mdev); + changed |= MLX5E_PORT_BUFFER_PFC; + } + + if (pfc->delay && + pfc->delay < MLX5E_MAX_CABLE_LENGTH && + pfc->delay != priv->dcbx.cable_len) { + priv->dcbx.cable_len = pfc->delay; + changed |= MLX5E_PORT_BUFFER_CABLE_LEN; + } + + if (MLX5_BUFFER_SUPPORTED(mdev)) { + pfc_new.pfc_en = (changed & MLX5E_PORT_BUFFER_PFC) ? pfc->pfc_en : curr_pfc_en; + if (priv->dcbx.manual_buffer) + ret = mlx5e_port_manual_buffer_config(priv, changed, + dev->mtu, &pfc_new, + NULL, NULL); + + if (ret && (changed & MLX5E_PORT_BUFFER_CABLE_LEN)) + priv->dcbx.cable_len = old_cable_len; + } + + if (!ret) { + mlx5e_dbg(HW, priv, + "%s: PFC per priority bit mask: 0x%x\n", + __func__, pfc->pfc_en); + } + return ret; +} + +static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return priv->dcbx.cap; +} + +static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_dcbx *dcbx = &priv->dcbx; + + if (mode & DCB_CAP_DCBX_LLD_MANAGED) + return 1; + + if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) { + if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO) + return 0; + + /* set dcbx to fw controlled */ + if (!mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_AUTO)) { + dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO; + dcbx->cap &= ~DCB_CAP_DCBX_HOST; + return 0; + } + + return 1; + } + + if (!(mode & DCB_CAP_DCBX_HOST)) + return 1; + + if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev))) + return 1; + + dcbx->cap = mode; + + return 0; +} + +static int mlx5e_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct dcb_app temp; + bool is_new; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) || + !MLX5_DSCP_SUPPORTED(priv->mdev)) + return -EOPNOTSUPP; + + if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) || + (app->protocol >= MLX5E_MAX_DSCP)) + return -EINVAL; + + /* Save the old entry info */ + temp.selector = IEEE_8021QAZ_APP_SEL_DSCP; + temp.protocol = app->protocol; + temp.priority = priv->dcbx_dp.dscp2prio[app->protocol]; + + /* Check if need to switch to dscp trust state */ + if (!priv->dcbx.dscp_app_cnt) { + err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_DSCP); + if (err) + return err; + } + + /* Skip the fw command if new and old mapping are the same */ + if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) { + err = mlx5e_set_dscp2prio(priv, app->protocol, app->priority); + if (err) + goto fw_err; + } + + /* Delete the old entry if exists */ + is_new = false; + err = dcb_ieee_delapp(dev, &temp); + if (err) + is_new = true; + + /* Add new entry and update counter */ + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + if (is_new) + priv->dcbx.dscp_app_cnt++; + + return err; + +fw_err: + mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + return err; +} + +static int mlx5e_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) || + !MLX5_DSCP_SUPPORTED(priv->mdev)) + return -EOPNOTSUPP; + + if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) || + (app->protocol >= MLX5E_MAX_DSCP)) + return -EINVAL; + + /* Skip if no dscp app entry */ + if (!priv->dcbx.dscp_app_cnt) + return -ENOENT; + + /* Check if the entry matches fw setting */ + if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) + return -ENOENT; + + /* Delete the app entry */ + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + /* Reset the priority mapping back to zero */ + err = mlx5e_set_dscp2prio(priv, app->protocol, 0); + if (err) + goto fw_err; + + priv->dcbx.dscp_app_cnt--; + + /* Check if need to switch to pcp trust state */ + if (!priv->dcbx.dscp_app_cnt) + err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + + return err; + +fw_err: + mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + return err; +} + +static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device *netdev, + struct ieee_maxrate *maxrate) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; + u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; + int err; + int i; + + err = mlx5_query_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); + if (err) + return err; + + memset(maxrate->tc_maxrate, 0, sizeof(maxrate->tc_maxrate)); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + switch (max_bw_unit[i]) { + case MLX5_100_MBPS_UNIT: + maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_100MB; + break; + case MLX5_GBPS_UNIT: + maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_1GB; + break; + case MLX5_BW_NO_LIMIT: + break; + default: + WARN(true, "non-supported BW unit"); + break; + } + } + + return 0; +} + +static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, + struct ieee_maxrate *maxrate) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; + u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; + __u64 upper_limit_mbps = roundup(255 * MLX5E_100MB, MLX5E_1GB); + int i; + + memset(max_bw_value, 0, sizeof(max_bw_value)); + memset(max_bw_unit, 0, sizeof(max_bw_unit)); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + if (!maxrate->tc_maxrate[i]) { + max_bw_unit[i] = MLX5_BW_NO_LIMIT; + continue; + } + if (maxrate->tc_maxrate[i] < upper_limit_mbps) { + max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], + MLX5E_100MB); + max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1; + max_bw_unit[i] = MLX5_100_MBPS_UNIT; + } else { + max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], + MLX5E_1GB); + max_bw_unit[i] = MLX5_GBPS_UNIT; + } + } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + mlx5e_dbg(HW, priv, "%s: tc_%d <=> max_bw %d Gbps\n", + __func__, i, max_bw_value[i]); + } + + return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); +} + +static u8 mlx5e_dcbnl_setall(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + struct mlx5_core_dev *mdev = priv->mdev; + struct ieee_ets ets; + struct ieee_pfc pfc; + int err = -EOPNOTSUPP; + int i; + + if (!MLX5_CAP_GEN(mdev, ets)) + goto out; + + memset(&ets, 0, sizeof(ets)); + memset(&pfc, 0, sizeof(pfc)); + + ets.ets_cap = IEEE_8021QAZ_MAX_TCS; + for (i = 0; i < CEE_DCBX_MAX_PGS; i++) { + ets.tc_tx_bw[i] = cee_cfg->pg_bw_pct[i]; + ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i]; + ets.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; + ets.prio_tc[i] = cee_cfg->prio_to_pg_map[i]; + mlx5e_dbg(HW, priv, + "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n", + __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i], + ets.prio_tc[i]); + } + + err = mlx5e_dbcnl_validate_ets(netdev, &ets, true); + if (err) + goto out; + + err = mlx5e_dcbnl_ieee_setets_core(priv, &ets); + if (err) { + netdev_err(netdev, + "%s, Failed to set ETS: %d\n", __func__, err); + goto out; + } + + /* Set PFC */ + pfc.pfc_cap = mlx5_max_tc(mdev) + 1; + if (!cee_cfg->pfc_enable) + pfc.pfc_en = 0; + else + for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) + pfc.pfc_en |= cee_cfg->pfc_setting[i] << i; + + err = mlx5e_dcbnl_ieee_setpfc(netdev, &pfc); + if (err) { + netdev_err(netdev, + "%s, Failed to set PFC: %d\n", __func__, err); + goto out; + } +out: + return err ? MLX5_DCB_NO_CHG : MLX5_DCB_CHG_RESET; +} + +static u8 mlx5e_dcbnl_getstate(struct net_device *netdev) +{ + return MLX5E_CEE_STATE_UP; +} + +static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev, + u8 *perm_addr) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (!perm_addr) + return; + + memset(perm_addr, 0xff, MAX_ADDR_LEN); + + mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr); +} + +static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev, + int priority, u8 prio_type, + u8 pgid, u8 bw_pct, u8 up_map) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + + if (priority >= CEE_DCBX_MAX_PRIO) { + netdev_err(netdev, + "%s, priority is out of range\n", __func__); + return; + } + + if (pgid >= CEE_DCBX_MAX_PGS) { + netdev_err(netdev, + "%s, priority group is out of range\n", __func__); + return; + } + + cee_cfg->prio_to_pg_map[priority] = pgid; +} + +static void mlx5e_dcbnl_setpgbwgcfgtx(struct net_device *netdev, + int pgid, u8 bw_pct) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + + if (pgid >= CEE_DCBX_MAX_PGS) { + netdev_err(netdev, + "%s, priority group is out of range\n", __func__); + return; + } + + cee_cfg->pg_bw_pct[pgid] = bw_pct; +} + +static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev, + int priority, u8 *prio_type, + u8 *pgid, u8 *bw_pct, u8 *up_map) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(priv->mdev, ets)) { + netdev_err(netdev, "%s, ets is not supported\n", __func__); + return; + } + + if (priority >= CEE_DCBX_MAX_PRIO) { + netdev_err(netdev, + "%s, priority is out of range\n", __func__); + return; + } + + *prio_type = 0; + *bw_pct = 0; + *up_map = 0; + + if (mlx5_query_port_prio_tc(mdev, priority, pgid)) + *pgid = 0; +} + +static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev, + int pgid, u8 *bw_pct) +{ + struct ieee_ets ets; + + if (pgid >= CEE_DCBX_MAX_PGS) { + netdev_err(netdev, + "%s, priority group is out of range\n", __func__); + return; + } + + mlx5e_dcbnl_ieee_getets(netdev, &ets); + *bw_pct = ets.tc_tx_bw[pgid]; +} + +static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev, + int priority, u8 setting) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + + if (priority >= CEE_DCBX_MAX_PRIO) { + netdev_err(netdev, + "%s, priority is out of range\n", __func__); + return; + } + + if (setting > 1) + return; + + cee_cfg->pfc_setting[priority] = setting; +} + +static int +mlx5e_dcbnl_get_priority_pfc(struct net_device *netdev, + int priority, u8 *setting) +{ + struct ieee_pfc pfc; + int err; + + err = mlx5e_dcbnl_ieee_getpfc(netdev, &pfc); + + if (err) + *setting = 0; + else + *setting = (pfc.pfc_en >> priority) & 0x01; + + return err; +} + +static void mlx5e_dcbnl_getpfccfg(struct net_device *netdev, + int priority, u8 *setting) +{ + if (priority >= CEE_DCBX_MAX_PRIO) { + netdev_err(netdev, + "%s, priority is out of range\n", __func__); + return; + } + + if (!setting) + return; + + mlx5e_dcbnl_get_priority_pfc(netdev, priority, setting); +} + +static u8 mlx5e_dcbnl_getcap(struct net_device *netdev, + int capid, u8 *cap) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 rval = 0; + + switch (capid) { + case DCB_CAP_ATTR_PG: + *cap = true; + break; + case DCB_CAP_ATTR_PFC: + *cap = true; + break; + case DCB_CAP_ATTR_UP2TC: + *cap = false; + break; + case DCB_CAP_ATTR_PG_TCS: + *cap = 1 << mlx5_max_tc(mdev); + break; + case DCB_CAP_ATTR_PFC_TCS: + *cap = 1 << mlx5_max_tc(mdev); + break; + case DCB_CAP_ATTR_GSP: + *cap = false; + break; + case DCB_CAP_ATTR_BCN: + *cap = false; + break; + case DCB_CAP_ATTR_DCBX: + *cap = priv->dcbx.cap | + DCB_CAP_DCBX_VER_CEE | + DCB_CAP_DCBX_VER_IEEE; + break; + default: + *cap = 0; + rval = 1; + break; + } + + return rval; +} + +static int mlx5e_dcbnl_getnumtcs(struct net_device *netdev, + int tcs_id, u8 *num) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + switch (tcs_id) { + case DCB_NUMTCS_ATTR_PG: + case DCB_NUMTCS_ATTR_PFC: + *num = mlx5_max_tc(mdev) + 1; + break; + default: + return -EINVAL; + } + + return 0; +} + +static u8 mlx5e_dcbnl_getpfcstate(struct net_device *netdev) +{ + struct ieee_pfc pfc; + + if (mlx5e_dcbnl_ieee_getpfc(netdev, &pfc)) + return MLX5E_CEE_STATE_DOWN; + + return pfc.pfc_en ? MLX5E_CEE_STATE_UP : MLX5E_CEE_STATE_DOWN; +} + +static void mlx5e_dcbnl_setpfcstate(struct net_device *netdev, u8 state) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + + if ((state != MLX5E_CEE_STATE_UP) && (state != MLX5E_CEE_STATE_DOWN)) + return; + + cee_cfg->pfc_enable = state; +} + +static int mlx5e_dcbnl_getbuffer(struct net_device *dev, + struct dcbnl_buffer *dcb_buffer) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_port_buffer port_buffer; + u8 buffer[MLX5E_MAX_PRIORITY]; + int i, err; + + if (!MLX5_BUFFER_SUPPORTED(mdev)) + return -EOPNOTSUPP; + + err = mlx5e_port_query_priority2buffer(mdev, buffer); + if (err) + return err; + + for (i = 0; i < MLX5E_MAX_PRIORITY; i++) + dcb_buffer->prio2buffer[i] = buffer[i]; + + err = mlx5e_port_query_buffer(priv, &port_buffer); + if (err) + return err; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) + dcb_buffer->buffer_size[i] = port_buffer.buffer[i].size; + dcb_buffer->total_size = port_buffer.port_buffer_size; + + return 0; +} + +static int mlx5e_dcbnl_setbuffer(struct net_device *dev, + struct dcbnl_buffer *dcb_buffer) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_port_buffer port_buffer; + u8 old_prio2buffer[MLX5E_MAX_PRIORITY]; + u32 *buffer_size = NULL; + u8 *prio2buffer = NULL; + u32 changed = 0; + int i, err; + + if (!MLX5_BUFFER_SUPPORTED(mdev)) + return -EOPNOTSUPP; + + for (i = 0; i < DCBX_MAX_BUFFERS; i++) + mlx5_core_dbg(mdev, "buffer[%d]=%d\n", i, dcb_buffer->buffer_size[i]); + + for (i = 0; i < MLX5E_MAX_PRIORITY; i++) + mlx5_core_dbg(mdev, "priority %d buffer%d\n", i, dcb_buffer->prio2buffer[i]); + + err = mlx5e_port_query_priority2buffer(mdev, old_prio2buffer); + if (err) + return err; + + for (i = 0; i < MLX5E_MAX_PRIORITY; i++) { + if (dcb_buffer->prio2buffer[i] != old_prio2buffer[i]) { + changed |= MLX5E_PORT_BUFFER_PRIO2BUFFER; + prio2buffer = dcb_buffer->prio2buffer; + break; + } + } + + err = mlx5e_port_query_buffer(priv, &port_buffer); + if (err) + return err; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + if (port_buffer.buffer[i].size != dcb_buffer->buffer_size[i]) { + changed |= MLX5E_PORT_BUFFER_SIZE; + buffer_size = dcb_buffer->buffer_size; + break; + } + } + + if (!changed) + return 0; + + priv->dcbx.manual_buffer = true; + err = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, NULL, + buffer_size, prio2buffer); + return err; +} + +const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = { + .ieee_getets = mlx5e_dcbnl_ieee_getets, + .ieee_setets = mlx5e_dcbnl_ieee_setets, + .ieee_getmaxrate = mlx5e_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlx5e_dcbnl_ieee_setmaxrate, + .ieee_getpfc = mlx5e_dcbnl_ieee_getpfc, + .ieee_setpfc = mlx5e_dcbnl_ieee_setpfc, + .ieee_setapp = mlx5e_dcbnl_ieee_setapp, + .ieee_delapp = mlx5e_dcbnl_ieee_delapp, + .getdcbx = mlx5e_dcbnl_getdcbx, + .setdcbx = mlx5e_dcbnl_setdcbx, + .dcbnl_getbuffer = mlx5e_dcbnl_getbuffer, + .dcbnl_setbuffer = mlx5e_dcbnl_setbuffer, + +/* CEE interfaces */ + .setall = mlx5e_dcbnl_setall, + .getstate = mlx5e_dcbnl_getstate, + .getpermhwaddr = mlx5e_dcbnl_getpermhwaddr, + + .setpgtccfgtx = mlx5e_dcbnl_setpgtccfgtx, + .setpgbwgcfgtx = mlx5e_dcbnl_setpgbwgcfgtx, + .getpgtccfgtx = mlx5e_dcbnl_getpgtccfgtx, + .getpgbwgcfgtx = mlx5e_dcbnl_getpgbwgcfgtx, + + .setpfccfg = mlx5e_dcbnl_setpfccfg, + .getpfccfg = mlx5e_dcbnl_getpfccfg, + .getcap = mlx5e_dcbnl_getcap, + .getnumtcs = mlx5e_dcbnl_getnumtcs, + .getpfcstate = mlx5e_dcbnl_getpfcstate, + .setpfcstate = mlx5e_dcbnl_setpfcstate, +}; + +static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv, + enum mlx5_dcbx_oper_mode *mode) +{ + u32 out[MLX5_ST_SZ_DW(dcbx_param)]; + + *mode = MLX5E_DCBX_PARAM_VER_OPER_HOST; + + if (!mlx5_query_port_dcbx_param(priv->mdev, out)) + *mode = MLX5_GET(dcbx_param, out, version_oper); + + /* From driver's point of view, we only care if the mode + * is host (HOST) or non-host (AUTO) + */ + if (*mode != MLX5E_DCBX_PARAM_VER_OPER_HOST) + *mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO; +} + +static void mlx5e_ets_init(struct mlx5e_priv *priv) +{ + struct ieee_ets ets; + int err; + int i; + + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return; + + memset(&ets, 0, sizeof(ets)); + ets.ets_cap = mlx5_max_tc(priv->mdev) + 1; + for (i = 0; i < ets.ets_cap; i++) { + ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; + ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR; + ets.prio_tc[i] = i; + } + + if (ets.ets_cap > 1) { + /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */ + ets.prio_tc[0] = 1; + ets.prio_tc[1] = 0; + } + + err = mlx5e_dcbnl_ieee_setets_core(priv, &ets); + if (err) + netdev_err(priv->netdev, + "%s, Failed to init ETS: %d\n", __func__, err); +} + +enum { + INIT, + DELETE, +}; + +static void mlx5e_dcbnl_dscp_app(struct mlx5e_priv *priv, int action) +{ + struct dcb_app temp; + int i; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return; + + if (!MLX5_DSCP_SUPPORTED(priv->mdev)) + return; + + /* No SEL_DSCP entry in non DSCP state */ + if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_DSCP) + return; + + temp.selector = IEEE_8021QAZ_APP_SEL_DSCP; + for (i = 0; i < MLX5E_MAX_DSCP; i++) { + temp.protocol = i; + temp.priority = priv->dcbx_dp.dscp2prio[i]; + if (action == INIT) + dcb_ieee_setapp(priv->netdev, &temp); + else + dcb_ieee_delapp(priv->netdev, &temp); + } + + priv->dcbx.dscp_app_cnt = (action == INIT) ? MLX5E_MAX_DSCP : 0; +} + +void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) +{ + mlx5e_dcbnl_dscp_app(priv, INIT); +} + +void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) +{ + mlx5e_dcbnl_dscp_app(priv, DELETE); +} + +static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv, + struct mlx5e_params *params) +{ + params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(priv->mdev); + if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP && + params->tx_min_inline_mode == MLX5_INLINE_MODE_L2) + params->tx_min_inline_mode = MLX5_INLINE_MODE_IP; +} + +static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv) +{ + struct mlx5e_channels new_channels = {}; + + mutex_lock(&priv->state_lock); + + new_channels.params = priv->channels.params; + mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + + /* Skip if tx_min_inline is the same */ + if (new_channels.params.tx_min_inline_mode == + priv->channels.params.tx_min_inline_mode) + goto out; + + if (mlx5e_open_channels(priv, &new_channels)) + goto out; + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + +out: + mutex_unlock(&priv->state_lock); +} + +static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) +{ + int err; + + err = mlx5_set_trust_state(priv->mdev, trust_state); + if (err) + return err; + priv->dcbx_dp.trust_state = trust_state; + mlx5e_trust_update_sq_inline_mode(priv); + + return err; +} + +static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio) +{ + int err; + + err = mlx5_set_dscp2prio(priv->mdev, dscp, prio); + if (err) + return err; + + priv->dcbx_dp.dscp2prio[dscp] = prio; + return err; +} + +static int mlx5e_trust_initialize(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP; + + if (!MLX5_DSCP_SUPPORTED(mdev)) + return 0; + + err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state); + if (err) + return err; + + mlx5e_trust_update_tx_min_inline_mode(priv, &priv->channels.params); + + err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio); + if (err) + return err; + + return 0; +} + +void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) +{ + struct mlx5e_dcbx *dcbx = &priv->dcbx; + + mlx5e_trust_initialize(priv); + + if (!MLX5_CAP_GEN(priv->mdev, qos)) + return; + + if (MLX5_CAP_GEN(priv->mdev, dcbx)) + mlx5e_dcbnl_query_dcbx_mode(priv, &dcbx->mode); + + priv->dcbx.cap = DCB_CAP_DCBX_VER_CEE | + DCB_CAP_DCBX_VER_IEEE; + if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST) + priv->dcbx.cap |= DCB_CAP_DCBX_HOST; + + priv->dcbx.manual_buffer = false; + priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN; + + mlx5e_ets_init(priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c new file mode 100644 index 000000000..d67adf70a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/net_dim.h> +#include "en.h" + +static void +mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, + struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) +{ + mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); + dim->state = NET_DIM_START_MEASURE; +} + +void mlx5e_rx_dim_work(struct work_struct *work) +{ + struct net_dim *dim = container_of(work, struct net_dim, work); + struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); + struct net_dim_cq_moder cur_moder = + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + + mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq); +} + +void mlx5e_tx_dim_work(struct work_struct *work) +{ + struct net_dim *dim = container_of(work, struct net_dim, work); + struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); + struct net_dim_cq_moder cur_moder = + net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + + mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c new file mode 100644 index 000000000..a5c4e4f0f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -0,0 +1,1701 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" +#include "en/port.h" +#include "lib/clock.h" + +void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + strlcpy(drvinfo->driver, DRIVER_NAME, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, DRIVER_VERSION, + sizeof(drvinfo->version)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", + fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), + mdev->board_id); + strlcpy(drvinfo->bus_info, pci_name(mdev->pdev), + sizeof(drvinfo->bus_info)); +} + +static void mlx5e_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_drvinfo(priv, drvinfo); +} + +struct ptys2ethtool_config { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); +}; + +static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER]; + +#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, ...) \ + ({ \ + struct ptys2ethtool_config *cfg; \ + const unsigned int modes[] = { __VA_ARGS__ }; \ + unsigned int i; \ + cfg = &ptys2ethtool_table[reg_]; \ + bitmap_zero(cfg->supported, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + bitmap_zero(cfg->advertised, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \ + __set_bit(modes[i], cfg->supported); \ + __set_bit(modes[i], cfg->advertised); \ + } \ + }) + +void mlx5e_build_ptys2ethtool_map(void) +{ + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); +} + +int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) +{ + int i, num_stats = 0; + + switch (sset) { + case ETH_SS_STATS: + for (i = 0; i < mlx5e_num_stats_grps; i++) + num_stats += mlx5e_stats_grps[i].get_num_stats(priv); + return num_stats; + case ETH_SS_PRIV_FLAGS: + return ARRAY_SIZE(mlx5e_priv_flags); + case ETH_SS_TEST: + return mlx5e_self_test_num(priv); + /* fallthrough */ + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_sset_count(priv, sset); +} + +static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) +{ + int i, idx = 0; + + for (i = 0; i < mlx5e_num_stats_grps; i++) + idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); +} + +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) +{ + int i; + + switch (stringset) { + case ETH_SS_PRIV_FLAGS: + for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++) + strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]); + break; + + case ETH_SS_TEST: + for (i = 0; i < mlx5e_self_test_num(priv); i++) + strcpy(data + i * ETH_GSTRING_LEN, + mlx5e_self_tests[i]); + break; + + case ETH_SS_STATS: + mlx5e_fill_stats_strings(priv, data); + break; + } +} + +static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_strings(priv, stringset, data); +} + +void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, + struct ethtool_stats *stats, u64 *data) +{ + int i, idx = 0; + + mutex_lock(&priv->state_lock); + mlx5e_update_stats(priv); + mutex_unlock(&priv->state_lock); + + for (i = 0; i < mlx5e_num_stats_grps; i++) + idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); +} + +static void mlx5e_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); +} + +void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param) +{ + param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; + param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames; + param->tx_pending = 1 << priv->channels.params.log_sq_size; +} + +static void mlx5e_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ringparam(priv, param); +} + +int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param) +{ + struct mlx5e_channels new_channels = {}; + u8 log_rq_size; + u8 log_sq_size; + int err = 0; + + if (param->rx_jumbo_pending) { + netdev_info(priv->netdev, "%s: rx_jumbo_pending not supported\n", + __func__); + return -EINVAL; + } + if (param->rx_mini_pending) { + netdev_info(priv->netdev, "%s: rx_mini_pending not supported\n", + __func__); + return -EINVAL; + } + + if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { + netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n", + __func__, param->rx_pending, + 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); + return -EINVAL; + } + + if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { + netdev_info(priv->netdev, "%s: tx_pending (%d) < min (%d)\n", + __func__, param->tx_pending, + 1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); + return -EINVAL; + } + + log_rq_size = order_base_2(param->rx_pending); + log_sq_size = order_base_2(param->tx_pending); + + if (log_rq_size == priv->channels.params.log_rq_mtu_frames && + log_sq_size == priv->channels.params.log_sq_size) + return 0; + + mutex_lock(&priv->state_lock); + + new_channels.params = priv->channels.params; + new_channels.params.log_rq_mtu_frames = log_rq_size; + new_channels.params.log_sq_size = log_sq_size; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto unlock; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto unlock; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + +unlock: + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_ringparam(priv, param); +} + +void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch) +{ + ch->max_combined = priv->profile->max_nch(priv->mdev); + ch->combined_count = priv->channels.params.num_channels; +} + +static void mlx5e_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch) +{ + unsigned int count = ch->combined_count; + struct mlx5e_channels new_channels = {}; + bool arfs_enabled; + int err = 0; + + if (!count) { + netdev_info(priv->netdev, "%s: combined_count=0 not supported\n", + __func__); + return -EINVAL; + } + + if (priv->channels.params.num_channels == count) + return 0; + + mutex_lock(&priv->state_lock); + + new_channels.params = priv->channels.params; + new_channels.params.num_channels = count; + if (!netif_is_rxfh_configured(priv->netdev)) + mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + + /* Create fresh channels with new parameters */ + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + + arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE; + if (arfs_enabled) + mlx5e_arfs_disable(priv); + + /* Switch to new channels, set new parameters and close old ones */ + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + + if (arfs_enabled) { + err = mlx5e_arfs_enable(priv); + if (err) + netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n", + __func__, err); + } + +out: + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_channels(priv, ch); +} + +int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal) +{ + struct net_dim_cq_moder *rx_moder, *tx_moder; + + if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) + return -EOPNOTSUPP; + + rx_moder = &priv->channels.params.rx_cq_moderation; + coal->rx_coalesce_usecs = rx_moder->usec; + coal->rx_max_coalesced_frames = rx_moder->pkts; + coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled; + + tx_moder = &priv->channels.params.tx_cq_moderation; + coal->tx_coalesce_usecs = tx_moder->usec; + coal->tx_max_coalesced_frames = tx_moder->pkts; + coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled; + + return 0; +} + +static int mlx5e_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal); +} + +#define MLX5E_MAX_COAL_TIME MLX5_MAX_CQ_PERIOD +#define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT + +static void +mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int tc; + int i; + + for (i = 0; i < priv->channels.num; ++i) { + struct mlx5e_channel *c = priv->channels.c[i]; + + for (tc = 0; tc < c->num_tc; tc++) { + mlx5_core_modify_cq_moderation(mdev, + &c->sq[tc].cq.mcq, + coal->tx_coalesce_usecs, + coal->tx_max_coalesced_frames); + } + + mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq, + coal->rx_coalesce_usecs, + coal->rx_max_coalesced_frames); + } +} + +int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal) +{ + struct net_dim_cq_moder *rx_moder, *tx_moder; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; + int err = 0; + bool reset; + + if (!MLX5_CAP_GEN(mdev, cq_moderation)) + return -EOPNOTSUPP; + + if (coal->tx_coalesce_usecs > MLX5E_MAX_COAL_TIME || + coal->rx_coalesce_usecs > MLX5E_MAX_COAL_TIME) { + netdev_info(priv->netdev, "%s: maximum coalesce time supported is %lu usecs\n", + __func__, MLX5E_MAX_COAL_TIME); + return -ERANGE; + } + + if (coal->tx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES || + coal->rx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES) { + netdev_info(priv->netdev, "%s: maximum coalesced frames supported is %lu\n", + __func__, MLX5E_MAX_COAL_FRAMES); + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + new_channels.params = priv->channels.params; + + rx_moder = &new_channels.params.rx_cq_moderation; + rx_moder->usec = coal->rx_coalesce_usecs; + rx_moder->pkts = coal->rx_max_coalesced_frames; + new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; + + tx_moder = &new_channels.params.tx_cq_moderation; + tx_moder->usec = coal->tx_coalesce_usecs; + tx_moder->pkts = coal->tx_max_coalesced_frames; + new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + /* we are opened */ + + reset = (!!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled) || + (!!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled); + + if (!reset) { + mlx5e_set_priv_channels_coalesce(priv, coal); + priv->channels.params = new_channels.params; + goto out; + } + + /* open fresh channels with new coal parameters */ + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + +out: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal); +} + +static void ptys2ethtool_supported_link(unsigned long *supported_modes, + u32 eth_proto_cap) +{ + unsigned long proto_cap = eth_proto_cap; + int proto; + + for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) + bitmap_or(supported_modes, supported_modes, + ptys2ethtool_table[proto].supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static void ptys2ethtool_adver_link(unsigned long *advertising_modes, + u32 eth_proto_cap) +{ + unsigned long proto_cap = eth_proto_cap; + int proto; + + for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) + bitmap_or(advertising_modes, advertising_modes, + ptys2ethtool_table[proto].advertised, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings, + u32 eth_proto_cap, + u8 connector_type) +{ + if (!connector_type || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) { + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, + FIBRE); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, + FIBRE); + } + + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, + Backplane); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, + Backplane); + } + return; + } + + switch (connector_type) { + case MLX5E_PORT_TP: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, TP); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, TP); + break; + case MLX5E_PORT_AUI: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, AUI); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, AUI); + break; + case MLX5E_PORT_BNC: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, BNC); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, BNC); + break; + case MLX5E_PORT_MII: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, MII); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, MII); + break; + case MLX5E_PORT_FIBRE: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, FIBRE); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, FIBRE); + break; + case MLX5E_PORT_DA: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Backplane); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Backplane); + break; + case MLX5E_PORT_NONE: + case MLX5E_PORT_OTHER: + default: + break; + } +} + +static void get_speed_duplex(struct net_device *netdev, + u32 eth_proto_oper, + struct ethtool_link_ksettings *link_ksettings) +{ + u32 speed = SPEED_UNKNOWN; + u8 duplex = DUPLEX_UNKNOWN; + + if (!netif_carrier_ok(netdev)) + goto out; + + speed = mlx5e_port_ptys2speed(eth_proto_oper); + if (!speed) { + speed = SPEED_UNKNOWN; + goto out; + } + + duplex = DUPLEX_FULL; + +out: + link_ksettings->base.speed = speed; + link_ksettings->base.duplex = duplex; +} + +static void get_supported(u32 eth_proto_cap, + struct ethtool_link_ksettings *link_ksettings) +{ + unsigned long *supported = link_ksettings->link_modes.supported; + + ptys2ethtool_supported_link(supported, eth_proto_cap); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); +} + +static void get_advertising(u32 eth_proto_cap, u8 tx_pause, + u8 rx_pause, + struct ethtool_link_ksettings *link_ksettings) +{ + unsigned long *advertising = link_ksettings->link_modes.advertising; + + ptys2ethtool_adver_link(advertising, eth_proto_cap); + if (rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); + if (tx_pause ^ rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause); +} + +static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = { + [MLX5E_PORT_UNKNOWN] = PORT_OTHER, + [MLX5E_PORT_NONE] = PORT_NONE, + [MLX5E_PORT_TP] = PORT_TP, + [MLX5E_PORT_AUI] = PORT_AUI, + [MLX5E_PORT_BNC] = PORT_BNC, + [MLX5E_PORT_MII] = PORT_MII, + [MLX5E_PORT_FIBRE] = PORT_FIBRE, + [MLX5E_PORT_DA] = PORT_DA, + [MLX5E_PORT_OTHER] = PORT_OTHER, + }; + +static u8 get_connector_port(u32 eth_proto, u8 connector_type) +{ + if (connector_type && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER) + return ptys2connector_type[connector_type]; + + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) | + MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) | + MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) | + MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + return PORT_FIBRE; + } + + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) | + MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | + MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) { + return PORT_DA; + } + + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) | + MLX5E_PROT_MASK(MLX5E_10GBASE_KR) | + MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) | + MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) { + return PORT_NONE; + } + + return PORT_OTHER; +} + +static void get_lp_advertising(u32 eth_proto_lp, + struct ethtool_link_ksettings *link_ksettings) +{ + unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; + + ptys2ethtool_adver_link(lp_advertising, eth_proto_lp); +} + +static int mlx5e_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; + u32 rx_pause = 0; + u32 tx_pause = 0; + u32 eth_proto_cap; + u32 eth_proto_admin; + u32 eth_proto_lp; + u32 eth_proto_oper; + u8 an_disable_admin; + u8 an_status; + u8 connector_type; + int err; + + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); + if (err) { + netdev_err(netdev, "%s: query port ptys failed: %d\n", + __func__, err); + goto err_query_ptys; + } + + eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); + an_status = MLX5_GET(ptys_reg, out, an_status); + connector_type = MLX5_GET(ptys_reg, out, connector_type); + + mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + + get_supported(eth_proto_cap, link_ksettings); + get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings); + get_speed_duplex(netdev, eth_proto_oper, link_ksettings); + + eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; + + link_ksettings->base.port = get_connector_port(eth_proto_oper, + connector_type); + ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin, + connector_type); + get_lp_advertising(eth_proto_lp, link_ksettings); + + if (an_status == MLX5_AN_COMPLETE) + ethtool_link_ksettings_add_link_mode(link_ksettings, + lp_advertising, Autoneg); + + link_ksettings->base.autoneg = an_disable_admin ? AUTONEG_DISABLE : + AUTONEG_ENABLE; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + Autoneg); + if (!an_disable_admin) + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Autoneg); + +err_query_ptys: + return err; +} + +static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) +{ + u32 i, ptys_modes = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (bitmap_intersects(ptys2ethtool_table[i].advertised, + link_modes, + __ETHTOOL_LINK_MODE_MASK_NBITS)) + ptys_modes |= MLX5E_PROT_MASK(i); + } + + return ptys_modes; +} + +static int mlx5e_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 eth_proto_cap, eth_proto_admin; + bool an_changes = false; + u8 an_disable_admin; + u8 an_disable_cap; + bool an_disable; + u32 link_modes; + u8 an_status; + u32 speed; + int err; + + speed = link_ksettings->base.speed; + + link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? + mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : + mlx5e_port_speed2linkmodes(speed); + + err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); + if (err) { + netdev_err(netdev, "%s: query port eth proto cap failed: %d\n", + __func__, err); + goto out; + } + + link_modes = link_modes & eth_proto_cap; + if (!link_modes) { + netdev_err(netdev, "%s: Not supported link mode(s) requested", + __func__); + err = -EINVAL; + goto out; + } + + err = mlx5_query_port_proto_admin(mdev, ð_proto_admin, MLX5_PTYS_EN); + if (err) { + netdev_err(netdev, "%s: query port eth proto admin failed: %d\n", + __func__, err); + goto out; + } + + mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status, + &an_disable_cap, &an_disable_admin); + + an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE; + an_changes = ((!an_disable && an_disable_admin) || + (an_disable && !an_disable_admin)); + + if (!an_changes && link_modes == eth_proto_admin) + goto out; + + mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN); + mlx5_toggle_port_link(mdev); + +out: + return err; +} + +static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return sizeof(priv->channels.params.toeplitz_hash_key); +} + +static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) +{ + return MLX5E_INDIR_RQT_SIZE; +} + +static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (indir) + memcpy(indir, priv->channels.params.indirection_rqt, + sizeof(priv->channels.params.indirection_rqt)); + + if (key) + memcpy(key, priv->channels.params.toeplitz_hash_key, + sizeof(priv->channels.params.toeplitz_hash_key)); + + if (hfunc) + *hfunc = priv->channels.params.rss_hfunc; + + return 0; +} + +static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) +{ + void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); + struct mlx5_core_dev *mdev = priv->mdev; + int ctxlen = MLX5_ST_SZ_BYTES(tirc); + int tt; + + MLX5_SET(modify_tir_in, in, bitmask.hash, 1); + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); + mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); + } + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); + mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen); + } +} + +static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + bool hash_changed = false; + void *in; + + if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && + (hfunc != ETH_RSS_HASH_XOR) && + (hfunc != ETH_RSS_HASH_TOP)) + return -EINVAL; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mutex_lock(&priv->state_lock); + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && + hfunc != priv->channels.params.rss_hfunc) { + priv->channels.params.rss_hfunc = hfunc; + hash_changed = true; + } + + if (indir) { + memcpy(priv->channels.params.indirection_rqt, indir, + sizeof(priv->channels.params.indirection_rqt)); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + u32 rqtn = priv->indir_rqt.rqtn; + struct mlx5e_redirect_rqt_param rrp = { + .is_rss = true, + { + .rss = { + .hfunc = priv->channels.params.rss_hfunc, + .channels = &priv->channels, + }, + }, + }; + + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); + } + } + + if (key) { + memcpy(priv->channels.params.toeplitz_hash_key, key, + sizeof(priv->channels.params.toeplitz_hash_key)); + hash_changed = hash_changed || + priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP; + } + + if (hash_changed) + mlx5e_modify_tirs_hash(priv, in, inlen); + + mutex_unlock(&priv->state_lock); + + kvfree(in); + + return 0; +} + +#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100 +#define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC 8000 +#define MLX5E_PFC_PREVEN_MINOR_PRECENT 85 +#define MLX5E_PFC_PREVEN_TOUT_MIN_MSEC 80 +#define MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout) \ + max_t(u16, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, \ + (critical_tout * MLX5E_PFC_PREVEN_MINOR_PRECENT) / 100) + +static int mlx5e_get_pfc_prevention_tout(struct net_device *netdev, + u16 *pfc_prevention_tout) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) || + !MLX5_CAP_DEBUG((priv)->mdev, stall_detect)) + return -EOPNOTSUPP; + + return mlx5_query_port_stall_watermark(mdev, pfc_prevention_tout, NULL); +} + +static int mlx5e_set_pfc_prevention_tout(struct net_device *netdev, + u16 pfc_preven) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 critical_tout; + u16 minor; + + if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) || + !MLX5_CAP_DEBUG((priv)->mdev, stall_detect)) + return -EOPNOTSUPP; + + critical_tout = (pfc_preven == PFC_STORM_PREVENTION_AUTO) ? + MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC : + pfc_preven; + + if (critical_tout != PFC_STORM_PREVENTION_DISABLE && + (critical_tout > MLX5E_PFC_PREVEN_TOUT_MAX_MSEC || + critical_tout < MLX5E_PFC_PREVEN_TOUT_MIN_MSEC)) { + netdev_info(netdev, "%s: pfc prevention tout not in range (%d-%d)\n", + __func__, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, + MLX5E_PFC_PREVEN_TOUT_MAX_MSEC); + return -EINVAL; + } + + minor = MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout); + return mlx5_set_port_stall_watermark(mdev, critical_tout, + minor); +} + +static int mlx5e_get_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + void *data) +{ + int err; + + switch (tuna->id) { + case ETHTOOL_PFC_PREVENTION_TOUT: + err = mlx5e_get_pfc_prevention_tout(dev, data); + break; + default: + err = -EINVAL; + break; + } + + return err; +} + +static int mlx5e_set_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + + switch (tuna->id) { + case ETHTOOL_PFC_PREVENTION_TOUT: + err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data); + break; + default: + err = -EINVAL; + break; + } + + mutex_unlock(&priv->state_lock); + return err; +} + +static void mlx5e_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause, + &pauseparam->tx_pause); + if (err) { + netdev_err(netdev, "%s: mlx5_query_port_pause failed:0x%x\n", + __func__, err); + } +} + +static int mlx5e_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EOPNOTSUPP; + + if (pauseparam->autoneg) + return -EINVAL; + + err = mlx5_set_port_pause(mdev, + pauseparam->rx_pause ? 1 : 0, + pauseparam->tx_pause ? 1 : 0); + if (err) { + netdev_err(netdev, "%s: mlx5_set_port_pause failed:0x%x\n", + __func__, err); + } + + return err; +} + +int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, + struct ethtool_ts_info *info) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + info->phc_index = mlx5_clock_get_ptp_index(mdev); + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || + info->phc_index == -1) + return 0; + + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} + +static int mlx5e_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_ts_info(priv, info); +} + +static __u32 mlx5e_get_wol_supported(struct mlx5_core_dev *mdev) +{ + __u32 ret = 0; + + if (MLX5_CAP_GEN(mdev, wol_g)) + ret |= WAKE_MAGIC; + + if (MLX5_CAP_GEN(mdev, wol_s)) + ret |= WAKE_MAGICSECURE; + + if (MLX5_CAP_GEN(mdev, wol_a)) + ret |= WAKE_ARP; + + if (MLX5_CAP_GEN(mdev, wol_b)) + ret |= WAKE_BCAST; + + if (MLX5_CAP_GEN(mdev, wol_m)) + ret |= WAKE_MCAST; + + if (MLX5_CAP_GEN(mdev, wol_u)) + ret |= WAKE_UCAST; + + if (MLX5_CAP_GEN(mdev, wol_p)) + ret |= WAKE_PHY; + + return ret; +} + +static __u32 mlx5e_refomrat_wol_mode_mlx5_to_linux(u8 mode) +{ + __u32 ret = 0; + + if (mode & MLX5_WOL_MAGIC) + ret |= WAKE_MAGIC; + + if (mode & MLX5_WOL_SECURED_MAGIC) + ret |= WAKE_MAGICSECURE; + + if (mode & MLX5_WOL_ARP) + ret |= WAKE_ARP; + + if (mode & MLX5_WOL_BROADCAST) + ret |= WAKE_BCAST; + + if (mode & MLX5_WOL_MULTICAST) + ret |= WAKE_MCAST; + + if (mode & MLX5_WOL_UNICAST) + ret |= WAKE_UCAST; + + if (mode & MLX5_WOL_PHY_ACTIVITY) + ret |= WAKE_PHY; + + return ret; +} + +static u8 mlx5e_refomrat_wol_mode_linux_to_mlx5(__u32 mode) +{ + u8 ret = 0; + + if (mode & WAKE_MAGIC) + ret |= MLX5_WOL_MAGIC; + + if (mode & WAKE_MAGICSECURE) + ret |= MLX5_WOL_SECURED_MAGIC; + + if (mode & WAKE_ARP) + ret |= MLX5_WOL_ARP; + + if (mode & WAKE_BCAST) + ret |= MLX5_WOL_BROADCAST; + + if (mode & WAKE_MCAST) + ret |= MLX5_WOL_MULTICAST; + + if (mode & WAKE_UCAST) + ret |= MLX5_WOL_UNICAST; + + if (mode & WAKE_PHY) + ret |= MLX5_WOL_PHY_ACTIVITY; + + return ret; +} + +static void mlx5e_get_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 mlx5_wol_mode; + int err; + + memset(wol, 0, sizeof(*wol)); + + wol->supported = mlx5e_get_wol_supported(mdev); + if (!wol->supported) + return; + + err = mlx5_query_port_wol(mdev, &mlx5_wol_mode); + if (err) + return; + + wol->wolopts = mlx5e_refomrat_wol_mode_mlx5_to_linux(mlx5_wol_mode); +} + +static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + __u32 wol_supported = mlx5e_get_wol_supported(mdev); + u32 mlx5_wol_mode; + + if (!wol_supported) + return -EOPNOTSUPP; + + if (wol->wolopts & ~wol_supported) + return -EINVAL; + + mlx5_wol_mode = mlx5e_refomrat_wol_mode_linux_to_mlx5(wol->wolopts); + + return mlx5_set_port_wol(mdev, mlx5_wol_mode); +} + +static u32 mlx5e_get_msglevel(struct net_device *dev) +{ + return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel; +} + +static void mlx5e_set_msglevel(struct net_device *dev, u32 val) +{ + ((struct mlx5e_priv *)netdev_priv(dev))->msglevel = val; +} + +static int mlx5e_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 beacon_duration; + + if (!MLX5_CAP_GEN(mdev, beacon_led)) + return -EOPNOTSUPP; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + beacon_duration = MLX5_BEACON_DURATION_INF; + break; + case ETHTOOL_ID_INACTIVE: + beacon_duration = MLX5_BEACON_DURATION_OFF; + break; + default: + return -EOPNOTSUPP; + } + + return mlx5_set_port_beacon(mdev, beacon_duration); +} + +static int mlx5e_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *dev = priv->mdev; + int size_read = 0; + u8 data[4]; + + size_read = mlx5_query_module_eeprom(dev, 0, 2, data); + if (size_read < 2) + return -EIO; + + /* data[0] = identifier byte */ + switch (data[0]) { + case MLX5_MODULE_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + case MLX5_MODULE_ID_QSFP_PLUS: + case MLX5_MODULE_ID_QSFP28: + /* data[1] = revision id */ + if (data[0] == MLX5_MODULE_ID_QSFP28 || data[1] >= 0x3) { + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + } + break; + case MLX5_MODULE_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", + __func__, data[0]); + return -EINVAL; + } + + return 0; +} + +static int mlx5e_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int offset = ee->offset; + int size_read; + int i = 0; + + if (!ee->len) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + size_read = mlx5_query_module_eeprom(mdev, offset, ee->len - i, + data + i); + + if (!size_read) + /* Done reading */ + return 0; + + if (size_read < 0) { + netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n", + __func__, size_read); + return size_read; + } + + i += size_read; + offset += size_read; + } + + return 0; +} + +typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); + +static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, + bool is_rx_cq) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; + bool mode_changed; + u8 cq_period_mode, current_cq_period_mode; + int err = 0; + + cq_period_mode = enable ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + current_cq_period_mode = is_rx_cq ? + priv->channels.params.rx_cq_moderation.cq_period_mode : + priv->channels.params.tx_cq_moderation.cq_period_mode; + mode_changed = cq_period_mode != current_cq_period_mode; + + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && + !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) + return -EOPNOTSUPP; + + if (!mode_changed) + return 0; + + new_channels.params = priv->channels.params; + if (is_rx_cq) + mlx5e_set_rx_cq_mode_params(&new_channels.params, cq_period_mode); + else + mlx5e_set_tx_cq_mode_params(&new_channels.params, cq_period_mode); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + return err; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + return 0; +} + +static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) +{ + return set_pflag_cqe_based_moder(netdev, enable, false); +} + +static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) +{ + return set_pflag_cqe_based_moder(netdev, enable, true); +} + +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val) +{ + bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS); + struct mlx5e_channels new_channels = {}; + int err = 0; + + if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) + return new_val ? -EOPNOTSUPP : 0; + + if (curr_val == new_val) + return 0; + + new_channels.params = priv->channels.params; + MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + return err; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n", + MLX5E_GET_PFLAG(&priv->channels.params, + MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF"); + + return 0; +} + +static int set_pflag_rx_cqe_compress(struct net_device *netdev, + bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (!MLX5_CAP_GEN(mdev, cqe_compression)) + return -EOPNOTSUPP; + + if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) { + netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n"); + return -EINVAL; + } + + err = mlx5e_modify_rx_cqe_compression_locked(priv, enable); + if (err) + return err; + + priv->channels.params.rx_cqe_compress_def = enable; + + return 0; +} + +static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; + int err; + + if (enable) { + if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) + return -EOPNOTSUPP; + if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params)) + return -EINVAL; + } else if (priv->channels.params.lro_en) { + netdev_warn(netdev, "Can't set legacy RQ with LRO, disable LRO first\n"); + return -EINVAL; + } + + new_channels.params = priv->channels.params; + + MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable); + mlx5e_set_rq_type(mdev, &new_channels.params); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + return err; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + return 0; +} + +static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels *channels = &priv->channels; + struct mlx5e_channel *c; + int i; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || + priv->channels.params.xdp_prog) + return 0; + + for (i = 0; i < channels->num; i++) { + c = channels->c[i]; + if (enable) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + else + __clear_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + } + + return 0; +} + +static int mlx5e_handle_pflag(struct net_device *netdev, + u32 wanted_flags, + enum mlx5e_priv_flag flag, + mlx5e_pflag_handler pflag_handler) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + bool enable = !!(wanted_flags & flag); + u32 changes = wanted_flags ^ priv->channels.params.pflags; + int err; + + if (!(changes & flag)) + return 0; + + err = pflag_handler(netdev, enable); + if (err) { + netdev_err(netdev, "%s private flag 0x%x failed err %d\n", + enable ? "Enable" : "Disable", flag, err); + return err; + } + + MLX5E_SET_PFLAG(&priv->channels.params, flag, enable); + return 0; +} + +static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_RX_CQE_BASED_MODER, + set_pflag_rx_cqe_based_moder); + if (err) + goto out; + + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_TX_CQE_BASED_MODER, + set_pflag_tx_cqe_based_moder); + if (err) + goto out; + + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_RX_CQE_COMPRESS, + set_pflag_rx_cqe_compress); + if (err) + goto out; + + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_RX_STRIDING_RQ, + set_pflag_rx_striding_rq); + if (err) + goto out; + + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, + set_pflag_rx_no_csum_complete); + +out: + mutex_unlock(&priv->state_lock); + + /* Need to fix some features.. */ + netdev_update_features(netdev); + + return err; +} + +static u32 mlx5e_get_priv_flags(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return priv->channels.params.pflags; +} + +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *dev = priv->netdev; + const struct firmware *fw; + int err; + + if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, flash->data, &dev->dev); + if (err) + return err; + + dev_hold(dev); + rtnl_unlock(); + + err = mlx5_firmware_flash(mdev, fw); + release_firmware(fw); + + rtnl_lock(); + dev_put(dev); + return err; +} + +static int mlx5e_flash_device(struct net_device *dev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + +#ifndef CONFIG_MLX5_EN_RXNFC +/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS + * otherwise this function will be defined from en_fs_ethtool.c + */ +static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (info->cmd != ETHTOOL_GRXRINGS) + return -EOPNOTSUPP; + /* ring_count is needed by ethtool -x */ + info->data = priv->channels.params.num_channels; + return 0; +} +#endif + +const struct ethtool_ops mlx5e_ethtool_ops = { + .get_drvinfo = mlx5e_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_get_strings, + .get_sset_count = mlx5e_get_sset_count, + .get_ethtool_stats = mlx5e_get_ethtool_stats, + .get_ringparam = mlx5e_get_ringparam, + .set_ringparam = mlx5e_set_ringparam, + .get_channels = mlx5e_get_channels, + .set_channels = mlx5e_set_channels, + .get_coalesce = mlx5e_get_coalesce, + .set_coalesce = mlx5e_set_coalesce, + .get_link_ksettings = mlx5e_get_link_ksettings, + .set_link_ksettings = mlx5e_set_link_ksettings, + .get_rxfh_key_size = mlx5e_get_rxfh_key_size, + .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, + .get_rxfh = mlx5e_get_rxfh, + .set_rxfh = mlx5e_set_rxfh, + .get_rxnfc = mlx5e_get_rxnfc, +#ifdef CONFIG_MLX5_EN_RXNFC + .set_rxnfc = mlx5e_set_rxnfc, +#endif + .flash_device = mlx5e_flash_device, + .get_tunable = mlx5e_get_tunable, + .set_tunable = mlx5e_set_tunable, + .get_pauseparam = mlx5e_get_pauseparam, + .set_pauseparam = mlx5e_set_pauseparam, + .get_ts_info = mlx5e_get_ts_info, + .set_phys_id = mlx5e_set_phys_id, + .get_wol = mlx5e_get_wol, + .set_wol = mlx5e_set_wol, + .get_module_info = mlx5e_get_module_info, + .get_module_eeprom = mlx5e_get_module_eeprom, + .get_priv_flags = mlx5e_get_priv_flags, + .set_priv_flags = mlx5e_set_priv_flags, + .self_test = mlx5e_self_test, + .get_msglevel = mlx5e_get_msglevel, + .set_msglevel = mlx5e_set_msglevel, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c new file mode 100644 index 000000000..c6eea6b6b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -0,0 +1,1576 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/list.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/mlx5/fs.h> +#include "en.h" +#include "lib/mpfs.h" + +static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai, int type); +static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai); + +enum { + MLX5E_FULLMATCH = 0, + MLX5E_ALLMULTI = 1, + MLX5E_PROMISC = 2, +}; + +enum { + MLX5E_UC = 0, + MLX5E_MC_IPV4 = 1, + MLX5E_MC_IPV6 = 2, + MLX5E_MC_OTHER = 3, +}; + +enum { + MLX5E_ACTION_NONE = 0, + MLX5E_ACTION_ADD = 1, + MLX5E_ACTION_DEL = 2, +}; + +struct mlx5e_l2_hash_node { + struct hlist_node hlist; + u8 action; + struct mlx5e_l2_rule ai; + bool mpfs; +}; + +static inline int mlx5e_hash_l2(u8 *addr) +{ + return addr[5]; +} + +static void mlx5e_add_l2_to_hash(struct hlist_head *hash, u8 *addr) +{ + struct mlx5e_l2_hash_node *hn; + int ix = mlx5e_hash_l2(addr); + int found = 0; + + hlist_for_each_entry(hn, &hash[ix], hlist) + if (ether_addr_equal_64bits(hn->ai.addr, addr)) { + found = 1; + break; + } + + if (found) { + hn->action = MLX5E_ACTION_NONE; + return; + } + + hn = kzalloc(sizeof(*hn), GFP_ATOMIC); + if (!hn) + return; + + ether_addr_copy(hn->ai.addr, addr); + hn->action = MLX5E_ACTION_ADD; + + hlist_add_head(&hn->hlist, &hash[ix]); +} + +static void mlx5e_del_l2_from_hash(struct mlx5e_l2_hash_node *hn) +{ + hlist_del(&hn->hlist); + kfree(hn); +} + +static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) +{ + struct net_device *ndev = priv->netdev; + int max_list_size; + int list_size; + u16 *vlans; + int vlan; + int err; + int i; + + list_size = 0; + for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) + list_size++; + + max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); + + if (list_size > max_list_size) { + netdev_warn(ndev, + "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", + list_size, max_list_size); + list_size = max_list_size; + } + + vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL); + if (!vlans) + return -ENOMEM; + + i = 0; + for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) { + if (i >= list_size) + break; + vlans[i++] = vlan; + } + + err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size); + if (err) + netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n", + err); + + kfree(vlans); + return err; +} + +enum mlx5e_vlan_rule_type { + MLX5E_VLAN_RULE_TYPE_UNTAGGED, + MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, + MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, +}; + +static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, + u16 vid, struct mlx5_flow_spec *spec) +{ + struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle **rule_p; + MLX5_DECLARE_FLOW_ACT(flow_act); + int err = 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.l2.ft.t; + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + /* cvlan_tag enabled in match criteria and + * disabled in match value means both S & C tags + * don't exist (untagged of both) + */ + rule_p = &priv->fs.vlan.untagged_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + break; + case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: + rule_p = &priv->fs.vlan.any_cvlan_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); + break; + case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: + rule_p = &priv->fs.vlan.any_svlan_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: + rule_p = &priv->fs.vlan.active_svlans_rule[vid]; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); + break; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */ + rule_p = &priv->fs.vlan.active_cvlans_rule[vid]; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); + break; + } + + if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type)) + return 0; + + *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + + if (IS_ERR(*rule_p)) { + err = PTR_ERR(*rule_p); + *rule_p = NULL; + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } + + return err; +} + +static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID) + mlx5e_vport_context_update_vlans(priv); + + err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec); + + kvfree(spec); + + return err; +} + +static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + if (priv->fs.vlan.untagged_rule) { + mlx5_del_flow_rules(priv->fs.vlan.untagged_rule); + priv->fs.vlan.untagged_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: + if (priv->fs.vlan.any_cvlan_rule) { + mlx5_del_flow_rules(priv->fs.vlan.any_cvlan_rule); + priv->fs.vlan.any_cvlan_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: + if (priv->fs.vlan.any_svlan_rule) { + mlx5_del_flow_rules(priv->fs.vlan.any_svlan_rule); + priv->fs.vlan.any_svlan_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: + if (priv->fs.vlan.active_svlans_rule[vid]) { + mlx5_del_flow_rules(priv->fs.vlan.active_svlans_rule[vid]); + priv->fs.vlan.active_svlans_rule[vid] = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID: + if (priv->fs.vlan.active_cvlans_rule[vid]) { + mlx5_del_flow_rules(priv->fs.vlan.active_cvlans_rule[vid]); + priv->fs.vlan.active_cvlans_rule[vid] = NULL; + } + mlx5e_vport_context_update_vlans(priv); + break; + } +} + +static void mlx5e_del_any_vid_rules(struct mlx5e_priv *priv) +{ + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); +} + +static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + if (err) + return err; + + return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); +} + +void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv) +{ + if (!priv->fs.vlan.cvlan_filter_disabled) + return; + + priv->fs.vlan.cvlan_filter_disabled = false; + if (priv->netdev->flags & IFF_PROMISC) + return; + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); +} + +void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv) +{ + if (priv->fs.vlan.cvlan_filter_disabled) + return; + + priv->fs.vlan.cvlan_filter_disabled = true; + if (priv->netdev->flags & IFF_PROMISC) + return; + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); +} + +static int mlx5e_vlan_rx_add_cvid(struct mlx5e_priv *priv, u16 vid) +{ + int err; + + set_bit(vid, priv->fs.vlan.active_cvlans); + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + if (err) + clear_bit(vid, priv->fs.vlan.active_cvlans); + + return err; +} + +static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid) +{ + struct net_device *netdev = priv->netdev; + int err; + + set_bit(vid, priv->fs.vlan.active_svlans); + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + if (err) { + clear_bit(vid, priv->fs.vlan.active_svlans); + return err; + } + + /* Need to fix some features.. */ + netdev_update_features(netdev); + return err; +} + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (be16_to_cpu(proto) == ETH_P_8021Q) + return mlx5e_vlan_rx_add_cvid(priv, vid); + else if (be16_to_cpu(proto) == ETH_P_8021AD) + return mlx5e_vlan_rx_add_svid(priv, vid); + + return -EOPNOTSUPP; +} + +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (be16_to_cpu(proto) == ETH_P_8021Q) { + clear_bit(vid, priv->fs.vlan.active_cvlans); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + } else if (be16_to_cpu(proto) == ETH_P_8021AD) { + clear_bit(vid, priv->fs.vlan.active_svlans); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + netdev_update_features(dev); + } + + return 0; +} + +static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) { + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); + } + + for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + + if (priv->fs.vlan.cvlan_filter_disabled) + mlx5e_add_any_vid_rules(priv); +} + +static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) { + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); + } + + for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + + WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state))); + + /* must be called after DESTROY bit is set and + * set_rx_mode is called and flushed + */ + if (priv->fs.vlan.cvlan_filter_disabled) + mlx5e_del_any_vid_rules(priv); +} + +#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, + struct mlx5e_l2_hash_node *hn) +{ + u8 action = hn->action; + u8 mac_addr[ETH_ALEN]; + int l2_err = 0; + + ether_addr_copy(mac_addr, hn->ai.addr); + + switch (action) { + case MLX5E_ACTION_ADD: + mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH); + if (!is_multicast_ether_addr(mac_addr)) { + l2_err = mlx5_mpfs_add_mac(priv->mdev, mac_addr); + hn->mpfs = !l2_err; + } + hn->action = MLX5E_ACTION_NONE; + break; + + case MLX5E_ACTION_DEL: + if (!is_multicast_ether_addr(mac_addr) && hn->mpfs) + l2_err = mlx5_mpfs_del_mac(priv->mdev, mac_addr); + mlx5e_del_l2_flow_rule(priv, &hn->ai); + mlx5e_del_l2_from_hash(hn); + break; + } + + if (l2_err) + netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n", + action == MLX5E_ACTION_ADD ? "add" : "del", mac_addr, l2_err); +} + +static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(netdev); + + mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, + priv->netdev->dev_addr); + + netdev_for_each_uc_addr(ha, netdev) + mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, ha->addr); + + netdev_for_each_mc_addr(ha, netdev) + mlx5e_add_l2_to_hash(priv->fs.l2.netdev_mc, ha->addr); + + netif_addr_unlock_bh(netdev); +} + +static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, + u8 addr_array[][ETH_ALEN], int size) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct net_device *ndev = priv->netdev; + struct mlx5e_l2_hash_node *hn; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int i = 0; + int hi; + + addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc; + + if (is_uc) /* Make sure our own address is pushed first */ + ether_addr_copy(addr_array[i++], ndev->dev_addr); + else if (priv->fs.l2.broadcast_enabled) + ether_addr_copy(addr_array[i++], ndev->broadcast); + + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { + if (ether_addr_equal(ndev->dev_addr, hn->ai.addr)) + continue; + if (i >= size) + break; + ether_addr_copy(addr_array[i++], hn->ai.addr); + } +} + +static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, + int list_type) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct mlx5e_l2_hash_node *hn; + u8 (*addr_array)[ETH_ALEN] = NULL; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int max_size; + int size; + int err; + int hi; + + size = is_uc ? 0 : (priv->fs.l2.broadcast_enabled ? 1 : 0); + max_size = is_uc ? + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); + + addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc; + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) + size++; + + if (size > max_size) { + netdev_warn(priv->netdev, + "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", + is_uc ? "UC" : "MC", size, max_size); + size = max_size; + } + + if (size) { + addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!addr_array) { + err = -ENOMEM; + goto out; + } + mlx5e_fill_addr_array(priv, list_type, addr_array, size); + } + + err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size); +out: + if (err) + netdev_err(priv->netdev, + "Failed to modify vport %s list err(%d)\n", + is_uc ? "UC" : "MC", err); + kfree(addr_array); +} + +static void mlx5e_vport_context_update(struct mlx5e_priv *priv) +{ + struct mlx5e_l2_table *ea = &priv->fs.l2; + + mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC); + mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC); + mlx5_modify_nic_vport_promisc(priv->mdev, 0, + ea->allmulti_enabled, + ea->promisc_enabled); +} + +static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_l2_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i) + mlx5e_execute_l2_action(priv, hn); + + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i) + mlx5e_execute_l2_action(priv, hn); +} + +static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_l2_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i) + hn->action = MLX5E_ACTION_DEL; + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i) + hn->action = MLX5E_ACTION_DEL; + + if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state)) + mlx5e_sync_netdev_addr(priv); + + mlx5e_apply_netdev_addr(priv); +} + +void mlx5e_set_rx_mode_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + set_rx_mode_work); + + struct mlx5e_l2_table *ea = &priv->fs.l2; + struct net_device *ndev = priv->netdev; + + bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state); + bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC); + bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI); + bool broadcast_enabled = rx_mode_enable; + + bool enable_promisc = !ea->promisc_enabled && promisc_enabled; + bool disable_promisc = ea->promisc_enabled && !promisc_enabled; + bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled; + bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; + bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; + bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; + + if (enable_promisc) { + if (!priv->channels.params.vlan_strip_disable) + netdev_warn_once(ndev, + "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n"); + mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC); + if (!priv->fs.vlan.cvlan_filter_disabled) + mlx5e_add_any_vid_rules(priv); + } + if (enable_allmulti) + mlx5e_add_l2_flow_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); + if (enable_broadcast) + mlx5e_add_l2_flow_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); + + mlx5e_handle_netdev_addr(priv); + + if (disable_broadcast) + mlx5e_del_l2_flow_rule(priv, &ea->broadcast); + if (disable_allmulti) + mlx5e_del_l2_flow_rule(priv, &ea->allmulti); + if (disable_promisc) { + if (!priv->fs.vlan.cvlan_filter_disabled) + mlx5e_del_any_vid_rules(priv); + mlx5e_del_l2_flow_rule(priv, &ea->promisc); + } + + ea->promisc_enabled = promisc_enabled; + ea->allmulti_enabled = allmulti_enabled; + ea->broadcast_enabled = broadcast_enabled; + + mlx5e_vport_context_update(priv); +} + +static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft) +{ + int i; + + for (i = ft->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ft->g[i])) + mlx5_destroy_flow_group(ft->g[i]); + ft->g[i] = NULL; + } + ft->num_groups = 0; +} + +void mlx5e_init_l2_addr(struct mlx5e_priv *priv) +{ + ether_addr_copy(priv->fs.l2.broadcast.addr, priv->netdev->broadcast); +} + +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) +{ + mlx5e_destroy_groups(ft); + kfree(ft->g); + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; +} + +static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) +{ + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->rules[i])) { + mlx5_del_flow_rules(ttc->rules[i]); + ttc->rules[i] = NULL; + } + } + + for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { + mlx5_del_flow_rules(ttc->tunnel_rules[i]); + ttc->tunnel_rules[i] = NULL; + } + } +} + +struct mlx5e_etype_proto { + u16 etype; + u8 proto; +}; + +static struct mlx5e_etype_proto ttc_rules[] = { + [MLX5E_TT_IPV4_TCP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_TCP, + }, + [MLX5E_TT_IPV6_TCP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_TCP, + }, + [MLX5E_TT_IPV4_UDP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_UDP, + }, + [MLX5E_TT_IPV6_UDP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_UDP, + }, + [MLX5E_TT_IPV4_IPSEC_AH] = { + .etype = ETH_P_IP, + .proto = IPPROTO_AH, + }, + [MLX5E_TT_IPV6_IPSEC_AH] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_AH, + }, + [MLX5E_TT_IPV4_IPSEC_ESP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_ESP, + }, + [MLX5E_TT_IPV6_IPSEC_ESP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_ESP, + }, + [MLX5E_TT_IPV4] = { + .etype = ETH_P_IP, + .proto = 0, + }, + [MLX5E_TT_IPV6] = { + .etype = ETH_P_IPV6, + .proto = 0, + }, + [MLX5E_TT_ANY] = { + .etype = 0, + .proto = 0, + }, +}; + +static struct mlx5e_etype_proto ttc_tunnel_rules[] = { + [MLX5E_TT_IPV4_GRE] = { + .etype = ETH_P_IP, + .proto = IPPROTO_GRE, + }, + [MLX5E_TT_IPV6_GRE] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_GRE, + }, +}; + +static u8 mlx5e_etype_to_ipv(u16 ethertype) +{ + if (ethertype == ETH_P_IP) + return 4; + + if (ethertype == ETH_P_IPV6) + return 6; + + return 0; +} + +static struct mlx5_flow_handle * +mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, + u8 proto) +{ + int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); + } + + ipv = mlx5e_etype_to_ipv(etype); + if (match_ipv_outer && ipv) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); + } else if (etype) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv, + struct ttc_params *params, + struct mlx5e_ttc_table *ttc) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle **rules; + struct mlx5_flow_table *ft; + int tt; + int err; + + ft = ttc->ft.t; + rules = ttc->rules; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + if (tt == MLX5E_TT_ANY) + dest.tir_num = params->any_tt_tirn; + else + dest.tir_num = params->indir_tirn[tt]; + rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + + if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + rules = ttc->tunnel_rules; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = params->inner_ttc->ft.t; + for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { + rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, + ttc_tunnel_rules[tt].etype, + ttc_tunnel_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + + return 0; + +del_rules: + err = PTR_ERR(rules[tt]); + rules[tt] = NULL; + mlx5e_cleanup_ttc_rules(ttc); + return err; +} + +#define MLX5E_TTC_NUM_GROUPS 3 +#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT) +#define MLX5E_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ + MLX5E_TTC_GROUP2_SIZE +\ + MLX5E_TTC_GROUP3_SIZE) + +#define MLX5E_INNER_TTC_NUM_GROUPS 3 +#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3) +#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\ + MLX5E_INNER_TTC_GROUP2_SIZE +\ + MLX5E_INNER_TTC_GROUP3_SIZE) + +static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, + bool use_ipv) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &ttc->ft; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS, + sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + ft->g = NULL; + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + if (use_ipv) + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); + else + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static struct mlx5_flow_handle * +mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, u8 proto) +{ + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ipv = mlx5e_etype_to_ipv(etype); + if (etype && ipv) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); + } + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv, + struct ttc_params *params, + struct mlx5e_ttc_table *ttc) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle **rules; + struct mlx5_flow_table *ft; + int err; + int tt; + + ft = ttc->ft.t; + rules = ttc->rules; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + if (tt == MLX5E_TT_ANY) + dest.tir_num = params->any_tt_tirn; + else + dest.tir_num = params->indir_tirn[tt]; + + rules[tt] = mlx5e_generate_inner_ttc_rule(priv, ft, &dest, + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + + return 0; + +del_rules: + err = PTR_ERR(rules[tt]); + rules[tt] = NULL; + mlx5e_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &ttc->ft; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + ft->g = NULL; + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, + struct ttc_params *ttc_params) +{ + ttc_params->any_tt_tirn = priv->direct_tir[0].tirn; + ttc_params->inner_ttc = &priv->fs.inner_ttc; +} + +void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params) +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + + ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE; + ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_NIC_PRIO; +} + +void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params) + +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + + ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; + ft_attr->level = MLX5E_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_NIC_PRIO; +} + +int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc) +{ + struct mlx5e_flow_table *ft = &ttc->ft; + int err; + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_inner_ttc_table_groups(ttc); + if (err) + goto err; + + err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc) +{ + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + mlx5e_cleanup_ttc_rules(ttc); + mlx5e_destroy_flow_table(&ttc->ft); +} + +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc) +{ + mlx5e_cleanup_ttc_rules(ttc); + mlx5e_destroy_flow_table(&ttc->ft); +} + +int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc) +{ + bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); + struct mlx5e_flow_table *ft = &ttc->ft; + int err; + + ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer); + if (err) + goto err; + + err = mlx5e_generate_ttc_table_rules(priv, params, ttc); + if (err) + goto err; + + return 0; +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai) +{ + if (!IS_ERR_OR_NULL(ai->rule)) { + mlx5_del_flow_rules(ai->rule); + ai->rule = NULL; + } +} + +static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai, int type) +{ + struct mlx5_flow_table *ft = priv->fs.l2.ft.t; + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_spec *spec; + int err = 0; + u8 *mc_dmac; + u8 *mv_dmac; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dmac_47_16); + mv_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dmac_47_16); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.ttc.ft.t; + + switch (type) { + case MLX5E_FULLMATCH: + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + eth_broadcast_addr(mc_dmac); + ether_addr_copy(mv_dmac, ai->addr); + break; + + case MLX5E_ALLMULTI: + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + mc_dmac[0] = 0x01; + mv_dmac[0] = 0x01; + break; + + case MLX5E_PROMISC: + break; + } + + ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(ai->rule)) { + netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n", + __func__, mv_dmac); + err = PTR_ERR(ai->rule); + ai->rule = NULL; + } + + kvfree(spec); + + return err; +} + +#define MLX5E_NUM_L2_GROUPS 3 +#define MLX5E_L2_GROUP1_SIZE BIT(0) +#define MLX5E_L2_GROUP2_SIZE BIT(15) +#define MLX5E_L2_GROUP3_SIZE BIT(0) +#define MLX5E_L2_TABLE_SIZE (MLX5E_L2_GROUP1_SIZE +\ + MLX5E_L2_GROUP2_SIZE +\ + MLX5E_L2_GROUP3_SIZE) +static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &l2_table->ft; + int ix = 0; + u8 *mc_dmac; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_NUM_L2_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers.dmac_47_16); + /* Flow Group for promiscuous */ + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_L2_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + /* Flow Group for full match */ + eth_broadcast_addr(mc_dmac); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_L2_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + /* Flow Group for allmulti */ + eth_zero_addr(mc_dmac); + mc_dmac[0] = 0x01; + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_L2_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + kvfree(in); + return 0; + +err_destroy_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + kvfree(in); + kfree(ft->g); + + return err; +} + +static void mlx5e_destroy_l2_table(struct mlx5e_priv *priv) +{ + mlx5e_destroy_flow_table(&priv->fs.l2.ft); +} + +static int mlx5e_create_l2_table(struct mlx5e_priv *priv) +{ + struct mlx5e_l2_table *l2_table = &priv->fs.l2; + struct mlx5e_flow_table *ft = &l2_table->ft; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_L2_TABLE_SIZE; + ft_attr.level = MLX5E_L2_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_l2_table_groups(l2_table); + if (err) + goto err_destroy_flow_table; + + return 0; + +err_destroy_flow_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; +} + +#define MLX5E_NUM_VLAN_GROUPS 4 +#define MLX5E_VLAN_GROUP0_SIZE BIT(12) +#define MLX5E_VLAN_GROUP1_SIZE BIT(12) +#define MLX5E_VLAN_GROUP2_SIZE BIT(1) +#define MLX5E_VLAN_GROUP3_SIZE BIT(0) +#define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ + MLX5E_VLAN_GROUP1_SIZE +\ + MLX5E_VLAN_GROUP2_SIZE +\ + MLX5E_VLAN_GROUP3_SIZE) + +static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in, + int inlen) +{ + int err; + int ix = 0; + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP0_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + return 0; + +err_destroy_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + + return err; +} + +static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft) +{ + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + err = __mlx5e_create_vlan_table_groups(ft, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fs.vlan.ft; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE; + ft_attr.level = MLX5E_VLAN_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) { + err = -ENOMEM; + goto err_destroy_vlan_table; + } + + err = mlx5e_create_vlan_table_groups(ft); + if (err) + goto err_free_g; + + mlx5e_add_vlan_rules(priv); + + return 0; + +err_free_g: + kfree(ft->g); +err_destroy_vlan_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; +} + +static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) +{ + mlx5e_del_vlan_rules(priv); + mlx5e_destroy_flow_table(&priv->fs.vlan.ft); +} + +int mlx5e_create_flow_steering(struct mlx5e_priv *priv) +{ + struct ttc_params ttc_params = {}; + int tt, err; + + priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + + if (!priv->fs.ns) + return -EOPNOTSUPP; + + err = mlx5e_arfs_create_tables(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n", + err); + priv->netdev->hw_features &= ~NETIF_F_NTUPLE; + } + + mlx5e_set_ttc_basic_params(priv, &ttc_params); + mlx5e_set_inner_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; + + err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); + if (err) { + netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + + mlx5e_set_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; + + err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); + if (err) { + netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", + err); + goto err_destroy_inner_ttc_table; + } + + err = mlx5e_create_l2_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create l2 table, err=%d\n", + err); + goto err_destroy_ttc_table; + } + + err = mlx5e_create_vlan_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create vlan table, err=%d\n", + err); + goto err_destroy_l2_table; + } + + mlx5e_ethtool_init_steering(priv); + + return 0; + +err_destroy_l2_table: + mlx5e_destroy_l2_table(priv); +err_destroy_ttc_table: + mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); +err_destroy_inner_ttc_table: + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); +err_destroy_arfs_tables: + mlx5e_arfs_destroy_tables(priv); + + return err; +} + +void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) +{ + mlx5e_destroy_vlan_table(priv); + mlx5e_destroy_l2_table(priv); + mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); + mlx5e_arfs_destroy_tables(priv); + mlx5e_ethtool_cleanup_steering(priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c new file mode 100644 index 000000000..41cde926c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -0,0 +1,844 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/fs.h> +#include "en.h" + +struct mlx5e_ethtool_rule { + struct list_head list; + struct ethtool_rx_flow_spec flow_spec; + struct mlx5_flow_handle *rule; + struct mlx5e_ethtool_table *eth_ft; +}; + +static void put_flow_table(struct mlx5e_ethtool_table *eth_ft) +{ + if (!--eth_ft->num_rules) { + mlx5_destroy_flow_table(eth_ft->ft); + eth_ft->ft = NULL; + } +} + +#define MLX5E_ETHTOOL_L3_L4_PRIO 0 +#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + ETHTOOL_NUM_L3_L4_FTS) +#define MLX5E_ETHTOOL_NUM_ENTRIES 64000 +#define MLX5E_ETHTOOL_NUM_GROUPS 10 +static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs, + int num_tuples) +{ + struct mlx5e_ethtool_table *eth_ft; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int max_tuples; + int table_size; + int prio; + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case TCP_V6_FLOW: + case UDP_V6_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = &priv->fs.ethtool.l3_l4_ft[prio]; + break; + case IP_USER_FLOW: + case IPV6_USER_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = &priv->fs.ethtool.l3_l4_ft[prio]; + break; + case ETHER_FLOW: + max_tuples = ETHTOOL_NUM_L2_FTS; + prio = max_tuples - num_tuples; + eth_ft = &priv->fs.ethtool.l2_ft[prio]; + prio += MLX5E_ETHTOOL_L2_PRIO; + break; + default: + return ERR_PTR(-EINVAL); + } + + eth_ft->num_rules++; + if (eth_ft->ft) + return eth_ft; + + ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_ETHTOOL); + if (!ns) + return ERR_PTR(-EOPNOTSUPP); + + table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev, + flow_table_properties_nic_receive.log_max_ft_size)), + MLX5E_ETHTOOL_NUM_ENTRIES); + ft = mlx5_create_auto_grouped_flow_table(ns, prio, + table_size, + MLX5E_ETHTOOL_NUM_GROUPS, 0, 0); + if (IS_ERR(ft)) + return (void *)ft; + + eth_ft->ft = ft; + return eth_ft; +} + +static void mask_spec(u8 *mask, u8 *val, size_t size) +{ + unsigned int i; + + for (i = 0; i < size; i++, mask++, val++) + *((u8 *)val) = *((u8 *)mask) & *((u8 *)val); +} + +#define MLX5E_FTE_SET(header_p, fld, v) \ + MLX5_SET(fte_match_set_lyr_2_4, header_p, fld, v) + +#define MLX5E_FTE_ADDR_OF(header_p, fld) \ + MLX5_ADDR_OF(fte_match_set_lyr_2_4, header_p, fld) + +static void +set_ip4(void *headers_c, void *headers_v, __be32 ip4src_m, + __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v) +{ + if (ip4src_m) { + memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ip4src_v, sizeof(ip4src_v)); + memset(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), + 0xff, sizeof(ip4src_m)); + } + if (ip4dst_m) { + memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ip4dst_v, sizeof(ip4dst_v)); + memset(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + 0xff, sizeof(ip4dst_m)); + } + + MLX5E_FTE_SET(headers_c, ethertype, 0xffff); + MLX5E_FTE_SET(headers_v, ethertype, ETH_P_IP); +} + +static void +set_ip6(void *headers_c, void *headers_v, __be32 ip6src_m[4], + __be32 ip6src_v[4], __be32 ip6dst_m[4], __be32 ip6dst_v[4]) +{ + u8 ip6_sz = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); + + if (!ipv6_addr_any((struct in6_addr *)ip6src_m)) { + memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), + ip6src_v, ip6_sz); + memcpy(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), + ip6src_m, ip6_sz); + } + if (!ipv6_addr_any((struct in6_addr *)ip6dst_m)) { + memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + ip6dst_v, ip6_sz); + memcpy(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + ip6dst_m, ip6_sz); + } + + MLX5E_FTE_SET(headers_c, ethertype, 0xffff); + MLX5E_FTE_SET(headers_v, ethertype, ETH_P_IPV6); +} + +static void +set_tcp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v, + __be16 pdst_m, __be16 pdst_v) +{ + if (psrc_m) { + MLX5E_FTE_SET(headers_c, tcp_sport, 0xffff); + MLX5E_FTE_SET(headers_v, tcp_sport, ntohs(psrc_v)); + } + if (pdst_m) { + MLX5E_FTE_SET(headers_c, tcp_dport, 0xffff); + MLX5E_FTE_SET(headers_v, tcp_dport, ntohs(pdst_v)); + } + + MLX5E_FTE_SET(headers_c, ip_protocol, 0xffff); + MLX5E_FTE_SET(headers_v, ip_protocol, IPPROTO_TCP); +} + +static void +set_udp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v, + __be16 pdst_m, __be16 pdst_v) +{ + if (psrc_m) { + MLX5E_FTE_SET(headers_c, udp_sport, 0xffff); + MLX5E_FTE_SET(headers_v, udp_sport, ntohs(psrc_v)); + } + + if (pdst_m) { + MLX5E_FTE_SET(headers_c, udp_dport, 0xffff); + MLX5E_FTE_SET(headers_v, udp_dport, ntohs(pdst_v)); + } + + MLX5E_FTE_SET(headers_c, ip_protocol, 0xffff); + MLX5E_FTE_SET(headers_v, ip_protocol, IPPROTO_UDP); +} + +static void +parse_tcp4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.tcp_ip4_spec; + struct ethtool_tcpip4_spec *l4_val = &fs->h_u.tcp_ip4_spec; + + set_ip4(headers_c, headers_v, l4_mask->ip4src, l4_val->ip4src, + l4_mask->ip4dst, l4_val->ip4dst); + + set_tcp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc, + l4_mask->pdst, l4_val->pdst); +} + +static void +parse_udp4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.udp_ip4_spec; + struct ethtool_tcpip4_spec *l4_val = &fs->h_u.udp_ip4_spec; + + set_ip4(headers_c, headers_v, l4_mask->ip4src, l4_val->ip4src, + l4_mask->ip4dst, l4_val->ip4dst); + + set_udp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc, + l4_mask->pdst, l4_val->pdst); +} + +static void +parse_ip4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_usrip4_spec *l3_mask = &fs->m_u.usr_ip4_spec; + struct ethtool_usrip4_spec *l3_val = &fs->h_u.usr_ip4_spec; + + set_ip4(headers_c, headers_v, l3_mask->ip4src, l3_val->ip4src, + l3_mask->ip4dst, l3_val->ip4dst); + + if (l3_mask->proto) { + MLX5E_FTE_SET(headers_c, ip_protocol, l3_mask->proto); + MLX5E_FTE_SET(headers_v, ip_protocol, l3_val->proto); + } +} + +static void +parse_ip6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_usrip6_spec *l3_mask = &fs->m_u.usr_ip6_spec; + struct ethtool_usrip6_spec *l3_val = &fs->h_u.usr_ip6_spec; + + set_ip6(headers_c, headers_v, l3_mask->ip6src, + l3_val->ip6src, l3_mask->ip6dst, l3_val->ip6dst); + + if (l3_mask->l4_proto) { + MLX5E_FTE_SET(headers_c, ip_protocol, l3_mask->l4_proto); + MLX5E_FTE_SET(headers_v, ip_protocol, l3_val->l4_proto); + } +} + +static void +parse_tcp6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.tcp_ip6_spec; + struct ethtool_tcpip6_spec *l4_val = &fs->h_u.tcp_ip6_spec; + + set_ip6(headers_c, headers_v, l4_mask->ip6src, + l4_val->ip6src, l4_mask->ip6dst, l4_val->ip6dst); + + set_tcp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc, + l4_mask->pdst, l4_val->pdst); +} + +static void +parse_udp6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.udp_ip6_spec; + struct ethtool_tcpip6_spec *l4_val = &fs->h_u.udp_ip6_spec; + + set_ip6(headers_c, headers_v, l4_mask->ip6src, + l4_val->ip6src, l4_mask->ip6dst, l4_val->ip6dst); + + set_udp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc, + l4_mask->pdst, l4_val->pdst); +} + +static void +parse_ether(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethhdr *eth_mask = &fs->m_u.ether_spec; + struct ethhdr *eth_val = &fs->h_u.ether_spec; + + mask_spec((u8 *)eth_mask, (u8 *)eth_val, sizeof(*eth_mask)); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, smac_47_16), eth_mask->h_source); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, smac_47_16), eth_val->h_source); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, dmac_47_16), eth_mask->h_dest); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, dmac_47_16), eth_val->h_dest); + MLX5E_FTE_SET(headers_c, ethertype, ntohs(eth_mask->h_proto)); + MLX5E_FTE_SET(headers_v, ethertype, ntohs(eth_val->h_proto)); +} + +static void +set_cvlan(void *headers_c, void *headers_v, __be16 vlan_tci) +{ + MLX5E_FTE_SET(headers_c, cvlan_tag, 1); + MLX5E_FTE_SET(headers_v, cvlan_tag, 1); + MLX5E_FTE_SET(headers_c, first_vid, 0xfff); + MLX5E_FTE_SET(headers_v, first_vid, ntohs(vlan_tci)); +} + +static void +set_dmac(void *headers_c, void *headers_v, + unsigned char m_dest[ETH_ALEN], unsigned char v_dest[ETH_ALEN]) +{ + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, dmac_47_16), m_dest); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, dmac_47_16), v_dest); +} + +static int set_flow_attrs(u32 *match_c, u32 *match_v, + struct ethtool_rx_flow_spec *fs) +{ + void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + + switch (flow_type) { + case TCP_V4_FLOW: + parse_tcp4(outer_headers_c, outer_headers_v, fs); + break; + case UDP_V4_FLOW: + parse_udp4(outer_headers_c, outer_headers_v, fs); + break; + case IP_USER_FLOW: + parse_ip4(outer_headers_c, outer_headers_v, fs); + break; + case TCP_V6_FLOW: + parse_tcp6(outer_headers_c, outer_headers_v, fs); + break; + case UDP_V6_FLOW: + parse_udp6(outer_headers_c, outer_headers_v, fs); + break; + case IPV6_USER_FLOW: + parse_ip6(outer_headers_c, outer_headers_v, fs); + break; + case ETHER_FLOW: + parse_ether(outer_headers_c, outer_headers_v, fs); + break; + default: + return -EINVAL; + } + + if ((fs->flow_type & FLOW_EXT) && + (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) + set_cvlan(outer_headers_c, outer_headers_v, fs->h_ext.vlan_tci); + + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) { + mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN); + set_dmac(outer_headers_c, outer_headers_v, fs->m_ext.h_dest, + fs->h_ext.h_dest); + } + + return 0; +} + +static void add_rule_to_list(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *rule) +{ + struct mlx5e_ethtool_rule *iter; + struct list_head *head = &priv->fs.ethtool.rules; + + list_for_each_entry(iter, &priv->fs.ethtool.rules, list) { + if (iter->flow_spec.location > rule->flow_spec.location) + break; + head = &iter->list; + } + priv->fs.ethtool.tot_num_rules++; + list_add(&rule->list, head); +} + +static bool outer_header_zero(u32 *match_criteria) +{ + int size = MLX5_FLD_SZ_BYTES(fte_match_param, outer_headers); + char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + + return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, + outer_headers_c + 1, + size - 1); +} + +static struct mlx5_flow_handle * +add_ethtool_flow_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct ethtool_rx_flow_spec *fs) +{ + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_spec *spec; + struct mlx5_flow_handle *rule; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + err = set_flow_attrs(spec->match_criteria, spec->match_value, + fs); + if (err) + goto free; + + if (fs->ring_cookie == RX_CLS_FLOW_DISC) { + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + } else { + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto free; + } + + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } + + spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); + flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n", + __func__, err); + goto free; + } +free: + kvfree(spec); + kfree(dst); + return err ? ERR_PTR(err) : rule; +} + +static void del_ethtool_rule(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *eth_rule) +{ + if (eth_rule->rule) + mlx5_del_flow_rules(eth_rule->rule); + list_del(ð_rule->list); + priv->fs.ethtool.tot_num_rules--; + put_flow_table(eth_rule->eth_ft); + kfree(eth_rule); +} + +static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *iter; + + list_for_each_entry(iter, &priv->fs.ethtool.rules, list) { + if (iter->flow_spec.location == location) + return iter; + } + return NULL; +} + +static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + + eth_rule = find_ethtool_rule(priv, location); + if (eth_rule) + del_ethtool_rule(priv, eth_rule); + + eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL); + if (!eth_rule) + return ERR_PTR(-ENOMEM); + + add_rule_to_list(priv, eth_rule); + return eth_rule; +} + +#define MAX_NUM_OF_ETHTOOL_RULES BIT(10) + +#define all_ones(field) (field == (__force typeof(field))-1) +#define all_zeros_or_all_ones(field) \ + ((field) == 0 || (field) == (__force typeof(field))-1) + +static int validate_ethter(struct ethtool_rx_flow_spec *fs) +{ + struct ethhdr *eth_mask = &fs->m_u.ether_spec; + int ntuples = 0; + + if (!is_zero_ether_addr(eth_mask->h_dest)) + ntuples++; + if (!is_zero_ether_addr(eth_mask->h_source)) + ntuples++; + if (eth_mask->h_proto) + ntuples++; + return ntuples; +} + +static int validate_tcpudp4(struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.tcp_ip4_spec; + int ntuples = 0; + + if (l4_mask->tos) + return -EINVAL; + + if (l4_mask->ip4src) { + if (!all_ones(l4_mask->ip4src)) + return -EINVAL; + ntuples++; + } + if (l4_mask->ip4dst) { + if (!all_ones(l4_mask->ip4dst)) + return -EINVAL; + ntuples++; + } + if (l4_mask->psrc) { + if (!all_ones(l4_mask->psrc)) + return -EINVAL; + ntuples++; + } + if (l4_mask->pdst) { + if (!all_ones(l4_mask->pdst)) + return -EINVAL; + ntuples++; + } + /* Flow is TCP/UDP */ + return ++ntuples; +} + +static int validate_ip4(struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_usrip4_spec *l3_mask = &fs->m_u.usr_ip4_spec; + int ntuples = 0; + + if (l3_mask->l4_4_bytes || l3_mask->tos || + fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4) + return -EINVAL; + if (l3_mask->ip4src) { + if (!all_ones(l3_mask->ip4src)) + return -EINVAL; + ntuples++; + } + if (l3_mask->ip4dst) { + if (!all_ones(l3_mask->ip4dst)) + return -EINVAL; + ntuples++; + } + if (l3_mask->proto) + ntuples++; + /* Flow is IPv4 */ + return ++ntuples; +} + +static int validate_ip6(struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_usrip6_spec *l3_mask = &fs->m_u.usr_ip6_spec; + int ntuples = 0; + + if (l3_mask->l4_4_bytes || l3_mask->tclass) + return -EINVAL; + if (!ipv6_addr_any((struct in6_addr *)l3_mask->ip6src)) + ntuples++; + + if (!ipv6_addr_any((struct in6_addr *)l3_mask->ip6dst)) + ntuples++; + if (l3_mask->l4_proto) + ntuples++; + /* Flow is IPv6 */ + return ++ntuples; +} + +static int validate_tcpudp6(struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.tcp_ip6_spec; + int ntuples = 0; + + if (l4_mask->tclass) + return -EINVAL; + + if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6src)) + ntuples++; + + if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6dst)) + ntuples++; + + if (l4_mask->psrc) { + if (!all_ones(l4_mask->psrc)) + return -EINVAL; + ntuples++; + } + if (l4_mask->pdst) { + if (!all_ones(l4_mask->pdst)) + return -EINVAL; + ntuples++; + } + /* Flow is TCP/UDP */ + return ++ntuples; +} + +static int validate_vlan(struct ethtool_rx_flow_spec *fs) +{ + if (fs->m_ext.vlan_etype || + fs->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK)) + return -EINVAL; + + if (fs->m_ext.vlan_tci && + (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID)) + return -EINVAL; + + return 1; +} + +static int validate_flow(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs) +{ + int num_tuples = 0; + int ret = 0; + + if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) + return -ENOSPC; + + if (fs->ring_cookie >= priv->channels.params.num_channels && + fs->ring_cookie != RX_CLS_FLOW_DISC) + return -EINVAL; + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case ETHER_FLOW: + num_tuples += validate_ethter(fs); + break; + case TCP_V4_FLOW: + case UDP_V4_FLOW: + ret = validate_tcpudp4(fs); + if (ret < 0) + return ret; + num_tuples += ret; + break; + case IP_USER_FLOW: + ret = validate_ip4(fs); + if (ret < 0) + return ret; + num_tuples += ret; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + ret = validate_tcpudp6(fs); + if (ret < 0) + return ret; + num_tuples += ret; + break; + case IPV6_USER_FLOW: + ret = validate_ip6(fs); + if (ret < 0) + return ret; + num_tuples += ret; + break; + default: + return -ENOTSUPP; + } + if ((fs->flow_type & FLOW_EXT)) { + ret = validate_vlan(fs); + if (ret < 0) + return ret; + num_tuples += ret; + } + + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) + num_tuples++; + + return num_tuples; +} + +static int +mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs) +{ + struct mlx5e_ethtool_table *eth_ft; + struct mlx5e_ethtool_rule *eth_rule; + struct mlx5_flow_handle *rule; + int num_tuples; + int err; + + num_tuples = validate_flow(priv, fs); + if (num_tuples <= 0) { + netdev_warn(priv->netdev, "%s: flow is not valid %d\n", + __func__, num_tuples); + return num_tuples; + } + + eth_ft = get_flow_table(priv, fs, num_tuples); + if (IS_ERR(eth_ft)) + return PTR_ERR(eth_ft); + + eth_rule = get_ethtool_rule(priv, fs->location); + if (IS_ERR(eth_rule)) { + put_flow_table(eth_ft); + return PTR_ERR(eth_rule); + } + + eth_rule->flow_spec = *fs; + eth_rule->eth_ft = eth_ft; + if (!eth_ft->ft) { + err = -EINVAL; + goto del_ethtool_rule; + } + rule = add_ethtool_flow_rule(priv, eth_ft->ft, fs); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto del_ethtool_rule; + } + + eth_rule->rule = rule; + + return 0; + +del_ethtool_rule: + del_ethtool_rule(priv, eth_rule); + + return err; +} + +static int +mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + int err = 0; + + if (location >= MAX_NUM_OF_ETHTOOL_RULES) + return -ENOSPC; + + eth_rule = find_ethtool_rule(priv, location); + if (!eth_rule) { + err = -ENOENT; + goto out; + } + + del_ethtool_rule(priv, eth_rule); +out: + return err; +} + +static int +mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + + if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES) + return -EINVAL; + + list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) { + if (eth_rule->flow_spec.location == location) { + info->fs = eth_rule->flow_spec; + return 0; + } + } + + return -ENOENT; +} + +static int +mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs) +{ + int location = 0; + int idx = 0; + int err = 0; + + info->data = MAX_NUM_OF_ETHTOOL_RULES; + while ((!err || err == -ENOENT) && idx < info->rule_cnt) { + err = mlx5e_ethtool_get_flow(priv, info, location); + if (!err) + rule_locs[idx++] = location; + location++; + } + return err; +} + +void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) +{ + struct mlx5e_ethtool_rule *iter; + struct mlx5e_ethtool_rule *temp; + + list_for_each_entry_safe(iter, temp, &priv->fs.ethtool.rules, list) + del_ethtool_rule(priv, iter); +} + +void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) +{ + INIT_LIST_HEAD(&priv->fs.ethtool.rules); +} + +int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + int err = 0; + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = mlx5e_ethtool_flow_replace(priv, &cmd->fs); + break; + case ETHTOOL_SRXCLSRLDEL: + err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +int mlx5e_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *info, u32 *rule_locs) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err = 0; + + switch (info->cmd) { + case ETHTOOL_GRXRINGS: + info->data = priv->channels.params.num_channels; + break; + case ETHTOOL_GRXCLSRLCNT: + info->rule_cnt = priv->fs.ethtool.tot_num_rules; + break; + case ETHTOOL_GRXCLSRULE: + err = mlx5e_ethtool_get_flow(priv, info, info->fs.location); + break; + case ETHTOOL_GRXCLSRLALL: + err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c new file mode 100644 index 000000000..6ecb92f55 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -0,0 +1,5221 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <net/tc_act/tc_gact.h> +#include <net/pkt_cls.h> +#include <linux/mlx5/fs.h> +#include <net/vxlan.h> +#include <linux/bpf.h> +#include <net/page_pool.h> +#include "eswitch.h" +#include "en.h" +#include "en_tc.h" +#include "en_rep.h" +#include "en_accel/ipsec.h" +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/tls.h" +#include "accel/ipsec.h" +#include "accel/tls.h" +#include "lib/vxlan.h" +#include "lib/clock.h" +#include "en/port.h" +#include "en/xdp.h" + +struct mlx5e_rq_param { + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; + struct mlx5e_rq_frags_info frags_info; +}; + +struct mlx5e_sq_param { + u32 sqc[MLX5_ST_SZ_DW(sqc)]; + struct mlx5_wq_param wq; +}; + +struct mlx5e_cq_param { + u32 cqc[MLX5_ST_SZ_DW(cqc)]; + struct mlx5_wq_param wq; + u16 eq_ix; + u8 cq_period_mode; +}; + +struct mlx5e_channel_param { + struct mlx5e_rq_param rq; + struct mlx5e_sq_param sq; + struct mlx5e_sq_param xdp_sq; + struct mlx5e_sq_param icosq; + struct mlx5e_cq_param rx_cq; + struct mlx5e_cq_param tx_cq; + struct mlx5e_cq_param icosq_cq; +}; + +bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +{ + bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && + MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); + u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq); + bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap; + + if (!striding_rq_umr) + return false; + if (!inline_umr) { + mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n", + (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap); + return false; + } + return true; +} + +static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params) +{ + u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + u16 linear_rq_headroom = params->xdp_prog ? + XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; + u32 frag_sz; + + linear_rq_headroom += NET_IP_ALIGN; + + frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu); + + if (params->xdp_prog && frag_sz < PAGE_SIZE) + frag_sz = PAGE_SIZE; + + return frag_sz; +} + +static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params) +{ + u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params); + + return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); +} + +static bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params); + + return !params->lro_en && frag_sz <= PAGE_SIZE; +} + +#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \ + MLX5_MPWQE_LOG_STRIDE_SZ_BASE) +static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params); + s8 signed_log_num_strides_param; + u8 log_num_strides; + + if (!mlx5e_rx_is_linear_skb(mdev, params)) + return false; + + if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) + return false; + + if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) + return true; + + log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz); + signed_log_num_strides_param = + (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE; + + return signed_log_num_strides_param >= 0; +} + +static u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params) +{ + if (params->log_rq_mtu_frames < + mlx5e_mpwqe_log_pkts_per_wqe(params) + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW) + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; + + return params->log_rq_mtu_frames - mlx5e_mpwqe_log_pkts_per_wqe(params); +} + +static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) + return order_base_2(mlx5e_rx_get_linear_frag_sz(params)); + + return MLX5E_MPWQE_STRIDE_SZ(mdev, + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); +} + +static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return MLX5_MPWRQ_LOG_WQE_SZ - + mlx5e_mpwqe_get_log_stride_size(mdev, params); +} + +static u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + u16 linear_rq_headroom = params->xdp_prog ? + XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; + bool is_linear_skb; + + linear_rq_headroom += NET_IP_ALIGN; + + is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? + mlx5e_rx_is_linear_skb(mdev, params) : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params); + + return is_linear_skb ? linear_rq_headroom : 0; +} + +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + params->log_rq_mtu_frames = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + + mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? + BIT(mlx5e_mpwqe_get_log_rq_size(params)) : + BIT(params->log_rq_mtu_frames), + BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)), + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); +} + +bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return mlx5e_check_fragmented_striding_rq_cap(mdev) && + !MLX5_IPSEC_DEV(mdev) && + !(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params)); +} + +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) && + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_CYCLIC; +} + +static void mlx5e_update_carrier(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 port_state; + + port_state = mlx5_query_vport_state(mdev, + MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, + 0); + + if (port_state == VPORT_STATE_UP) { + netdev_info(priv->netdev, "Link up\n"); + netif_carrier_on(priv->netdev); + } else { + netdev_info(priv->netdev, "Link down\n"); + netif_carrier_off(priv->netdev); + } +} + +static void mlx5e_update_carrier_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + update_carrier_work); + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (priv->profile->update_carrier) + priv->profile->update_carrier(priv); + mutex_unlock(&priv->state_lock); +} + +void mlx5e_update_stats(struct mlx5e_priv *priv) +{ + int i; + + for (i = mlx5e_num_stats_grps - 1; i >= 0; i--) + if (mlx5e_stats_grps[i].update_stats) + mlx5e_stats_grps[i].update_stats(priv); +} + +static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) +{ + int i; + + for (i = mlx5e_num_stats_grps - 1; i >= 0; i--) + if (mlx5e_stats_grps[i].update_stats_mask & + MLX5E_NDO_UPDATE_STATS) + mlx5e_stats_grps[i].update_stats(priv); +} + +void mlx5e_update_stats_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv, + update_stats_work); + + mutex_lock(&priv->state_lock); + priv->profile->update_stats(priv); + mutex_unlock(&priv->state_lock); +} + +static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, + enum mlx5_dev_event event, unsigned long param) +{ + struct mlx5e_priv *priv = vpriv; + + if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) + return; + + switch (event) { + case MLX5_DEV_EVENT_PORT_UP: + case MLX5_DEV_EVENT_PORT_DOWN: + queue_work(priv->wq, &priv->update_carrier_work); + break; + default: + break; + } +} + +static void mlx5e_enable_async_events(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); +} + +static void mlx5e_disable_async_events(struct mlx5e_priv *priv) +{ + clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); + synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC)); +} + +static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, + struct mlx5e_icosq *sq, + struct mlx5e_umr_wqe *wqe) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS); + + cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + ds_cnt); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + cseg->imm = rq->mkey_be; + + ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; + ucseg->xlt_octowords = + cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); +} + +static u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_size(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_size(&rq->wqe.wq); + } +} + +static u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return rq->mpwqe.wq.cur_sz; + default: + return rq->wqe.wq.cur_sz; + } +} + +static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, + struct mlx5e_channel *c) +{ + int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); + + rq->mpwqe.info = kvzalloc_node(array_size(wq_sz, + sizeof(*rq->mpwqe.info)), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->mpwqe.info) + return -ENOMEM; + + mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe); + + return 0; +} + +static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, + u64 npages, u8 page_shift, + struct mlx5_core_mkey *umr_mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); + + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn); + MLX5_SET64(mkc, mkc, len, npages << page_shift); + MLX5_SET(mkc, mkc, translations_octword_size, + MLX5_MTT_OCTW(npages)); + MLX5_SET(mkc, mkc, log_page_size, page_shift); + + err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) +{ + u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq)); + + return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey); +} + +static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix) +{ + return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT; +} + +static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) +{ + struct mlx5e_wqe_frag_info next_frag = {}; + struct mlx5e_wqe_frag_info *prev = NULL; + int i; + + next_frag.di = &rq->wqe.di[0]; + + for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { + struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; + struct mlx5e_wqe_frag_info *frag = + &rq->wqe.frags[i << rq->wqe.info.log_num_frags]; + int f; + + for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) { + if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) { + next_frag.di++; + next_frag.offset = 0; + if (prev) + prev->last_in_page = true; + } + *frag = next_frag; + + /* prepare next */ + next_frag.offset += frag_info[f].frag_stride; + prev = frag; + } + } + + if (prev) + prev->last_in_page = true; +} + +static int mlx5e_init_di_list(struct mlx5e_rq *rq, + struct mlx5e_params *params, + int wq_sz, int cpu) +{ + int len = wq_sz << rq->wqe.info.log_num_frags; + + rq->wqe.di = kvzalloc_node(array_size(len, sizeof(*rq->wqe.di)), + GFP_KERNEL, cpu_to_node(cpu)); + if (!rq->wqe.di) + return -ENOMEM; + + mlx5e_init_frags_partition(rq); + + return 0; +} + +static void mlx5e_free_di_list(struct mlx5e_rq *rq) +{ + kvfree(rq->wqe.di); +} + +static int mlx5e_alloc_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_rq_param *rqp, + struct mlx5e_rq *rq) +{ + struct page_pool_params pp_params = { 0 }; + struct mlx5_core_dev *mdev = c->mdev; + void *rqc = rqp->rqc; + void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 pool_size; + int wq_sz; + int err; + int i; + + rqp->wq.db_numa_node = cpu_to_node(c->cpu); + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; + rq->channel = c; + rq->ix = c->ix; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->stats = &c->priv->channel_stats[c->ix].rq; + + rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; + if (IS_ERR(rq->xdp_prog)) { + err = PTR_ERR(rq->xdp_prog); + rq->xdp_prog = NULL; + goto err_rq_wq_destroy; + } + + err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix); + if (err < 0) + goto err_rq_wq_destroy; + + rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params); + pool_size = 1 << params->log_rq_mtu_frames; + + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, + &rq->wq_ctrl); + if (err) + goto err_rq_wq_destroy; + + rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR]; + + wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); + + pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params); + + rq->post_wqes = mlx5e_post_rx_mpwqes; + rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; + + rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe; +#ifdef CONFIG_MLX5_EN_IPSEC + if (MLX5_IPSEC_DEV(mdev)) { + err = -EINVAL; + netdev_err(c->netdev, "MPWQE RQ with IPSec offload not supported\n"); + goto err_rq_wq_destroy; + } +#endif + if (!rq->handle_rx_cqe) { + err = -EINVAL; + netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err); + goto err_rq_wq_destroy; + } + + rq->mpwqe.skb_from_cqe_mpwrq = + mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ? + mlx5e_skb_from_cqe_mpwrq_linear : + mlx5e_skb_from_cqe_mpwrq_nonlinear; + rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params); + rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params)); + + err = mlx5e_create_rq_umr_mkey(mdev, rq); + if (err) + goto err_rq_wq_destroy; + rq->mkey_be = cpu_to_be32(rq->umr_mkey.key); + + err = mlx5e_rq_alloc_mpwqe_info(rq, c); + if (err) + goto err_free; + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, + &rq->wq_ctrl); + if (err) + goto err_rq_wq_destroy; + + rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; + + wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); + + rq->wqe.info = rqp->frags_info; + rq->wqe.frags = + kvzalloc_node(array_size(sizeof(*rq->wqe.frags), + (wq_sz << rq->wqe.info.log_num_frags)), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->wqe.frags) { + err = -ENOMEM; + goto err_free; + } + + err = mlx5e_init_di_list(rq, params, wq_sz, c->cpu); + if (err) + goto err_free; + rq->post_wqes = mlx5e_post_rx_wqes; + rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; + +#ifdef CONFIG_MLX5_EN_IPSEC + if (c->priv->ipsec) + rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe; + else +#endif + rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe; + if (!rq->handle_rx_cqe) { + err = -EINVAL; + netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err); + goto err_free; + } + + rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(mdev, params) ? + mlx5e_skb_from_cqe_linear : + mlx5e_skb_from_cqe_nonlinear; + rq->mkey_be = c->mkey_be; + } + + /* Create a page_pool and register it with rxq */ + pp_params.order = 0; + pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ + pp_params.pool_size = pool_size; + pp_params.nid = cpu_to_node(c->cpu); + pp_params.dev = c->pdev; + pp_params.dma_dir = rq->buff.map_dir; + + /* page_pool can be used even when there is no rq->xdp_prog, + * given page_pool does not handle DMA mapping there is no + * required state to clear. And page_pool gracefully handle + * elevated refcnt. + */ + rq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->page_pool)) { + err = PTR_ERR(rq->page_pool); + rq->page_pool = NULL; + goto err_free; + } + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_POOL, rq->page_pool); + if (err) + goto err_free; + + for (i = 0; i < wq_sz; i++) { + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + struct mlx5e_rx_wqe_ll *wqe = + mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i); + u32 byte_count = + rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; + u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i); + + wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom); + wqe->data[0].byte_count = cpu_to_be32(byte_count); + wqe->data[0].lkey = rq->mkey_be; + } else { + struct mlx5e_rx_wqe_cyc *wqe = + mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i); + int f; + + for (f = 0; f < rq->wqe.info.num_frags; f++) { + u32 frag_size = rq->wqe.info.arr[f].frag_size | + MLX5_HW_START_PADDING; + + wqe->data[f].byte_count = cpu_to_be32(frag_size); + wqe->data[f].lkey = rq->mkey_be; + } + /* check if num_frags is not a pow of two */ + if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) { + wqe->data[f].byte_count = 0; + wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + wqe->data[f].addr = 0; + } + } + } + + INIT_WORK(&rq->dim.work, mlx5e_rx_dim_work); + + switch (params->rx_cq_moderation.cq_period_mode) { + case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: + rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE; + break; + case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: + default: + rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } + + rq->page_cache.head = 0; + rq->page_cache.tail = 0; + + return 0; + +err_free: + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + kvfree(rq->mpwqe.info); + mlx5_core_destroy_mkey(mdev, &rq->umr_mkey); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + kvfree(rq->wqe.frags); + mlx5e_free_di_list(rq); + } + +err_rq_wq_destroy: + if (rq->xdp_prog) + bpf_prog_put(rq->xdp_prog); + xdp_rxq_info_unreg(&rq->xdp_rxq); + if (rq->page_pool) + page_pool_destroy(rq->page_pool); + mlx5_wq_destroy(&rq->wq_ctrl); + + return err; +} + +static void mlx5e_free_rq(struct mlx5e_rq *rq) +{ + int i; + + if (rq->xdp_prog) + bpf_prog_put(rq->xdp_prog); + + xdp_rxq_info_unreg(&rq->xdp_rxq); + if (rq->page_pool) + page_pool_destroy(rq->page_pool); + + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + kvfree(rq->mpwqe.info); + mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + kvfree(rq->wqe.frags); + mlx5e_free_di_list(rq); + } + + for (i = rq->page_cache.head; i != rq->page_cache.tail; + i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { + struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i]; + + mlx5e_page_release(rq, dma_info, false); + } + mlx5_wq_destroy(&rq->wq_ctrl); +} + +static int mlx5e_create_rq(struct mlx5e_rq *rq, + struct mlx5e_rq_param *param) +{ + struct mlx5_core_dev *mdev = rq->mdev; + + void *in; + void *rqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * rq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + + memcpy(rqc, param->rqc, sizeof(param->rqc)); + + MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); + + mlx5_fill_page_frag_array(&rq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, + int next_state) +{ + struct mlx5_core_dev *mdev = rq->mdev; + + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, curr_state); + MLX5_SET(rqc, rqc, state, next_state); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); + MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS); + MLX5_SET(rqc, rqc, scatter_fcs, enable); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5_core_dev *mdev = c->mdev; + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); + MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD); + MLX5_SET(rqc, rqc, vsd, vsd); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + +static void mlx5e_destroy_rq(struct mlx5e_rq *rq) +{ + mlx5_core_destroy_rq(rq->mdev, rq->rqn); +} + +static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time); + struct mlx5e_channel *c = rq->channel; + + u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq)); + + do { + if (mlx5e_rqwq_get_cur_sz(rq) >= min_wqes) + return 0; + + msleep(20); + } while (time_before(jiffies, exp_time)); + + netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", + c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes); + + return -ETIMEDOUT; +} + +static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) +{ + __be16 wqe_ix_be; + u16 wqe_ix; + + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + + /* UMR WQE (if in progress) is always at wq->head */ + if (rq->mpwqe.umr_in_progress) + rq->dealloc_wqe(rq, wq->head); + + while (!mlx5_wq_ll_is_empty(wq)) { + struct mlx5e_rx_wqe_ll *wqe; + + wqe_ix_be = *wq->tail_next; + wqe_ix = be16_to_cpu(wqe_ix_be); + wqe = mlx5_wq_ll_get_wqe(wq, wqe_ix); + rq->dealloc_wqe(rq, wqe_ix); + mlx5_wq_ll_pop(wq, wqe_ix_be, + &wqe->next.next_wqe_index); + } + } else { + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + + while (!mlx5_wq_cyc_is_empty(wq)) { + wqe_ix = mlx5_wq_cyc_get_tail(wq); + rq->dealloc_wqe(rq, wqe_ix); + mlx5_wq_cyc_pop(wq); + } + } + +} + +static int mlx5e_open_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_rq_param *param, + struct mlx5e_rq *rq) +{ + int err; + + err = mlx5e_alloc_rq(c, params, param, rq); + if (err) + return err; + + err = mlx5e_create_rq(rq, param); + if (err) + goto err_free_rq; + + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) + goto err_destroy_rq; + + if (params->rx_dim_enabled) + __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + + /* We disable csum_complete when XDP is enabled since + * XDP programs might manipulate packets which will render + * skb->checksum incorrect. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + + return 0; + +err_destroy_rq: + mlx5e_destroy_rq(rq); +err_free_rq: + mlx5e_free_rq(rq); + + return err; +} + +static void mlx5e_activate_rq(struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *sq = &rq->channel->icosq; + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_wqe *nopwqe; + + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl); +} + +static void mlx5e_deactivate_rq(struct mlx5e_rq *rq) +{ + clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); + napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ +} + +static void mlx5e_close_rq(struct mlx5e_rq *rq) +{ + cancel_work_sync(&rq->dim.work); + mlx5e_destroy_rq(rq); + mlx5e_free_rx_descs(rq); + mlx5e_free_rq(rq); +} + +static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq) +{ + kvfree(sq->db.xdpi); +} + +static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->db.xdpi = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.xdpi)), + GFP_KERNEL, numa); + if (!sq->db.xdpi) { + mlx5e_free_xdpsq_db(sq); + return -ENOMEM; + } + + return 0; +} + +static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_xdpsq *sq, + bool is_redirect) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_wq_cyc *wq = &sq->wq; + int err; + + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; + sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + sq->stats = is_redirect ? + &c->priv->channel_stats[c->ix].xdpsq : + &c->priv->channel_stats[c->ix].rq_xdpsq; + + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq) +{ + mlx5e_free_xdpsq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq) +{ + kvfree(sq->db.ico_wqe); +} + +static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa) +{ + u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->db.ico_wqe = kvzalloc_node(array_size(wq_sz, + sizeof(*sq->db.ico_wqe)), + GFP_KERNEL, numa); + if (!sq->db.ico_wqe) + return -ENOMEM; + + return 0; +} + +static int mlx5e_alloc_icosq(struct mlx5e_channel *c, + struct mlx5e_sq_param *param, + struct mlx5e_icosq *sq) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_wq_cyc *wq = &sq->wq; + int err; + + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.bfreg.map; + + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +static void mlx5e_free_icosq(struct mlx5e_icosq *sq) +{ + mlx5e_free_icosq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq) +{ + kvfree(sq->db.wqe_info); + kvfree(sq->db.dma_fifo); +} + +static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + sq->db.dma_fifo = kvzalloc_node(array_size(df_sz, + sizeof(*sq->db.dma_fifo)), + GFP_KERNEL, numa); + sq->db.wqe_info = kvzalloc_node(array_size(wq_sz, + sizeof(*sq->db.wqe_info)), + GFP_KERNEL, numa); + if (!sq->db.dma_fifo || !sq->db.wqe_info) { + mlx5e_free_txqsq_db(sq); + return -ENOMEM; + } + + sq->dma_fifo_mask = df_sz - 1; + + return 0; +} + +static void mlx5e_sq_recover(struct work_struct *work); +static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, + int txq_ix, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, + int tc) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_wq_cyc *wq = &sq->wq; + int err; + + sq->pdev = c->pdev; + sq->tstamp = c->tstamp; + sq->clock = &mdev->clock; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->txq_ix = txq_ix; + sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; + sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; + INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover); + if (MLX5_IPSEC_DEV(c->priv->mdev)) + set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); + if (mlx5_accel_is_tls_device(c->priv->mdev)) + set_bit(MLX5E_SQ_STATE_TLS, &sq->state); + + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work); + sq->dim.mode = params->tx_cq_moderation.cq_period_mode; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +static void mlx5e_free_txqsq(struct mlx5e_txqsq *sq) +{ + mlx5e_free_txqsq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +struct mlx5e_create_sq_param { + struct mlx5_wq_ctrl *wq_ctrl; + u32 cqn; + u32 tisn; + u8 tis_lst_sz; + u8 min_inline_mode; +}; + +static int mlx5e_create_sq(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u32 *sqn) +{ + void *in; + void *sqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * csp->wq_ctrl->buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + memcpy(sqc, param->sqc, sizeof(param->sqc)); + MLX5_SET(sqc, sqc, tis_lst_sz, csp->tis_lst_sz); + MLX5_SET(sqc, sqc, tis_num_0, csp->tisn); + MLX5_SET(sqc, sqc, cqn, csp->cqn); + + if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) + MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode); + + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index); + MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); + + mlx5_fill_page_frag_array(&csp->wq_ctrl->buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_sq(mdev, in, inlen, sqn); + + kvfree(in); + + return err; +} + +struct mlx5e_modify_sq_param { + int curr_state; + int next_state; + bool rl_update; + int rl_index; +}; + +static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p) +{ + void *in; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_sq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + + MLX5_SET(modify_sq_in, in, sq_state, p->curr_state); + MLX5_SET(sqc, sqc, state, p->next_state); + if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) { + MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); + } + + err = mlx5_core_modify_sq(mdev, sqn, in, inlen); + + kvfree(in); + + return err; +} + +static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) +{ + mlx5_core_destroy_sq(mdev, sqn); +} + +static int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u32 *sqn) +{ + struct mlx5e_modify_sq_param msp = {0}; + int err; + + err = mlx5e_create_sq(mdev, param, csp, sqn); + if (err) + return err; + + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + err = mlx5e_modify_sq(mdev, *sqn, &msp); + if (err) + mlx5e_destroy_sq(mdev, *sqn); + + return err; +} + +static int mlx5e_set_sq_maxrate(struct net_device *dev, + struct mlx5e_txqsq *sq, u32 rate); + +static int mlx5e_open_txqsq(struct mlx5e_channel *c, + u32 tisn, + int txq_ix, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, + int tc) +{ + struct mlx5e_create_sq_param csp = {}; + u32 tx_rate; + int err; + + err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc); + if (err) + return err; + + csp.tisn = tisn; + csp.tis_lst_sz = 1; + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = sq->min_inline_mode; + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + if (err) + goto err_free_txqsq; + + tx_rate = c->priv->tx_rates[sq->txq_ix]; + if (tx_rate) + mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); + + if (params->tx_dim_enabled) + sq->state |= BIT(MLX5E_SQ_STATE_AM); + + return 0; + +err_free_txqsq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + mlx5e_free_txqsq(sq); + + return err; +} + +static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) +{ + WARN_ONCE(sq->cc != sq->pc, + "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", + sq->sqn, sq->cc, sq->pc); + sq->cc = 0; + sq->dma_fifo_cc = 0; + sq->pc = 0; +} + +static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) +{ + sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); +} + +static inline void netif_tx_disable_queue(struct netdev_queue *txq) +{ + __netif_tx_lock_bh(txq); + netif_tx_stop_queue(txq); + __netif_tx_unlock_bh(txq); +} + +static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5_wq_cyc *wq = &sq->wq; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + /* prevent netif_tx_wake_queue */ + napi_synchronize(&c->napi); + + netif_tx_disable_queue(sq->txq); + + /* last doorbell out, godspeed .. */ + if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_tx_wqe *nop; + + sq->db.wqe_info[pi].skb = NULL; + nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl); + } +} + +static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_rate_limit rl = {0}; + + cancel_work_sync(&sq->dim.work); + mlx5e_destroy_sq(mdev, sq->sqn); + if (sq->rate_limit) { + rl.rate = sq->rate_limit; + mlx5_rl_remove_rate(mdev, &rl); + } + mlx5e_free_txqsq_descs(sq); + mlx5e_free_txqsq(sq); +} + +static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + + while (time_before(jiffies, exp_time)) { + if (sq->cc == sq->pc) + return 0; + + msleep(20); + } + + netdev_err(sq->channel->netdev, + "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", + sq->sqn, sq->cc, sq->pc); + + return -ETIMEDOUT; +} + +static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + struct mlx5e_modify_sq_param msp = {0}; + int err; + + msp.curr_state = curr_state; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); + return err; + } + + return 0; +} + +static void mlx5e_sq_recover(struct work_struct *work) +{ + struct mlx5e_txqsq_recover *recover = + container_of(work, struct mlx5e_txqsq_recover, + recover_work); + struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq, + recover); + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + u8 state; + int err; + + err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", + sq->sqn, err); + return; + } + + if (state != MLX5_RQC_STATE_ERR) { + netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); + return; + } + + netif_tx_disable_queue(sq->txq); + + if (mlx5e_wait_for_sq_flush(sq)) + return; + + /* If the interval between two consecutive recovers per SQ is too + * short, don't recover to avoid infinite loop of ERR_CQE -> recover. + * If we reached this state, there is probably a bug that needs to be + * fixed. let's keep the queue close and let tx timeout cleanup. + */ + if (jiffies_to_msecs(jiffies - recover->last_recover) < + MLX5E_SQ_RECOVER_MIN_INTERVAL) { + netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n", + sq->sqn); + return; + } + + /* At this point, no new packets will arrive from the stack as TXQ is + * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all + * pending WQEs. SQ can safely reset the SQ. + */ + if (mlx5e_sq_to_ready(sq, state)) + return; + + mlx5e_reset_txqsq_cc_pc(sq); + sq->stats->recover++; + recover->last_recover = jiffies; + mlx5e_activate_txqsq(sq); +} + +static int mlx5e_open_icosq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_icosq *sq) +{ + struct mlx5e_create_sq_param csp = {}; + int err; + + err = mlx5e_alloc_icosq(c, param, sq); + if (err) + return err; + + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = params->tx_min_inline_mode; + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + if (err) + goto err_free_icosq; + + return 0; + +err_free_icosq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + mlx5e_free_icosq(sq); + + return err; +} + +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = sq->channel; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + napi_synchronize(&c->napi); + + mlx5e_destroy_sq(c->mdev, sq->sqn); + mlx5e_free_icosq(sq); +} + +static int mlx5e_open_xdpsq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_xdpsq *sq, + bool is_redirect) +{ + unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT; + struct mlx5e_create_sq_param csp = {}; + unsigned int inline_hdr_sz = 0; + int err; + int i; + + err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect); + if (err) + return err; + + csp.tis_lst_sz = 1; + csp.tisn = c->priv->tisn[0]; /* tc = 0 */ + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = sq->min_inline_mode; + if (is_redirect) + set_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state); + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + if (err) + goto err_free_xdpsq; + + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + ds_cnt++; + } + + /* Pre initialize fixed WQE fields */ + for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; + + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + + dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); + dseg->lkey = sq->mkey_be; + } + + return 0; + +err_free_xdpsq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + mlx5e_free_xdpsq(sq); + + return err; +} + +static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_channel *c = sq->channel; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + napi_synchronize(&c->napi); + + mlx5e_destroy_sq(c->mdev, sq->sqn); + mlx5e_free_xdpsq_descs(sq); + mlx5e_free_xdpsq(sq); +} + +static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) +{ + struct mlx5_core_cq *mcq = &cq->mcq; + int eqn_not_used; + unsigned int irqn; + int err; + u32 i; + + err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); + if (err) + return err; + + err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + return err; + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + *mcq->set_ci_db = 0; + *mcq->arm_db = 0; + mcq->vector = param->eq_ix; + mcq->comp = mlx5e_completion_event; + mcq->event = mlx5e_cq_error_event; + mcq->irqn = irqn; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + + cqe->op_own = 0xf1; + } + + cq->mdev = mdev; + + return 0; +} + +static int mlx5e_alloc_cq(struct mlx5e_channel *c, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) +{ + struct mlx5_core_dev *mdev = c->priv->mdev; + int err; + + param->wq.buf_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = cpu_to_node(c->cpu); + param->eq_ix = c->ix; + + err = mlx5e_alloc_cq_common(mdev, param, cq); + + cq->napi = &c->napi; + cq->channel = c; + + return err; +} + +static void mlx5e_free_cq(struct mlx5e_cq *cq) +{ + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) +{ + struct mlx5_core_dev *mdev = cq->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + + void *in; + void *cqc; + int inlen; + unsigned int irqn_not_used; + int eqn; + int err; + + err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + + memcpy(cqc, param->cqc, sizeof(param->cqc)); + + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); + + MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + err = mlx5_core_create_cq(mdev, mcq, in, inlen); + + kvfree(in); + + if (err) + return err; + + mlx5e_cq_arm(cq); + + return 0; +} + +static void mlx5e_destroy_cq(struct mlx5e_cq *cq) +{ + mlx5_core_destroy_cq(cq->mdev, &cq->mcq); +} + +static int mlx5e_open_cq(struct mlx5e_channel *c, + struct net_dim_cq_moder moder, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) +{ + struct mlx5_core_dev *mdev = c->mdev; + int err; + + err = mlx5e_alloc_cq(c, param, cq); + if (err) + return err; + + err = mlx5e_create_cq(cq, param); + if (err) + goto err_free_cq; + + if (MLX5_CAP_GEN(mdev, cq_moderation)) + mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts); + return 0; + +err_free_cq: + mlx5e_free_cq(cq); + + return err; +} + +static void mlx5e_close_cq(struct mlx5e_cq *cq) +{ + mlx5e_destroy_cq(cq); + mlx5e_free_cq(cq); +} + +static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) +{ + return cpumask_first(priv->mdev->priv.irq_info[ix + MLX5_EQ_VEC_COMP_BASE].mask); +} + +static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) +{ + int err; + int tc; + + for (tc = 0; tc < c->num_tc; tc++) { + err = mlx5e_open_cq(c, params->tx_cq_moderation, + &cparam->tx_cq, &c->sq[tc].cq); + if (err) + goto err_close_tx_cqs; + } + + return 0; + +err_close_tx_cqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_cq(&c->sq[tc].cq); + + return err; +} + +static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_cq(&c->sq[tc].cq); +} + +static int mlx5e_open_sqs(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) +{ + struct mlx5e_priv *priv = c->priv; + int err, tc, max_nch = priv->profile->max_nch(priv->mdev); + + for (tc = 0; tc < params->num_tc; tc++) { + int txq_ix = c->ix + tc * max_nch; + + err = mlx5e_open_txqsq(c, c->priv->tisn[tc], txq_ix, + params, &cparam->sq, &c->sq[tc], tc); + if (err) + goto err_close_sqs; + } + + return 0; + +err_close_sqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_txqsq(&c->sq[tc]); + + return err; +} + +static void mlx5e_close_sqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_txqsq(&c->sq[tc]); +} + +static int mlx5e_set_sq_maxrate(struct net_device *dev, + struct mlx5e_txqsq *sq, u32 rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_modify_sq_param msp = {0}; + struct mlx5_rate_limit rl = {0}; + u16 rl_index = 0; + int err; + + if (rate == sq->rate_limit) + /* nothing to do */ + return 0; + + if (sq->rate_limit) { + rl.rate = sq->rate_limit; + /* remove current rl index to free space to next ones */ + mlx5_rl_remove_rate(mdev, &rl); + } + + sq->rate_limit = 0; + + if (rate) { + rl.rate = rate; + err = mlx5_rl_add_rate(mdev, &rl_index, &rl); + if (err) { + netdev_err(dev, "Failed configuring rate %u: %d\n", + rate, err); + return err; + } + } + + msp.curr_state = MLX5_SQC_STATE_RDY; + msp.next_state = MLX5_SQC_STATE_RDY; + msp.rl_index = rl_index; + msp.rl_update = true; + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed configuring rate %u: %d\n", + rate, err); + /* remove the rate from the table */ + if (rate) + mlx5_rl_remove_rate(mdev, &rl); + return err; + } + + sq->rate_limit = rate; + return 0; +} + +static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_txqsq *sq = priv->txq2sq[index]; + int err = 0; + + if (!mlx5_rl_is_supported(mdev)) { + netdev_err(dev, "Rate limiting is not supported on this device\n"); + return -EINVAL; + } + + /* rate is given in Mb/sec, HW config is in Kb/sec */ + rate = rate << 10; + + /* Check whether rate in valid range, 0 is always valid */ + if (rate && !mlx5_rl_is_in_range(mdev, rate)) { + netdev_err(dev, "TX rate %u, is not in range\n", rate); + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + err = mlx5e_set_sq_maxrate(dev, sq, rate); + if (!err) + priv->tx_rates[index] = rate; + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam, + struct mlx5e_channel **cp) +{ + struct net_dim_cq_moder icocq_moder = {0, 0}; + struct net_device *netdev = priv->netdev; + int cpu = mlx5e_get_cpu(priv, ix); + struct mlx5e_channel *c; + unsigned int irq; + int err; + int eqn; + + err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + if (err) + return err; + + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + if (!c) + return -ENOMEM; + + c->priv = priv; + c->mdev = priv->mdev; + c->tstamp = &priv->tstamp; + c->ix = ix; + c->cpu = cpu; + c->pdev = &priv->mdev->pdev->dev; + c->netdev = priv->netdev; + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + c->num_tc = params->num_tc; + c->xdp = !!params->xdp_prog; + c->stats = &priv->channel_stats[ix].ch; + + c->irq_desc = irq_to_desc(irq); + + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); + + err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); + if (err) + goto err_napi_del; + + err = mlx5e_open_tx_cqs(c, params, cparam); + if (err) + goto err_close_icosq_cq; + + err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xdpsq.cq); + if (err) + goto err_close_tx_cqs; + + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq); + if (err) + goto err_close_xdp_tx_cqs; + + /* XDP SQ CQ params are same as normal TXQ sq CQ params */ + err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation, + &cparam->tx_cq, &c->rq.xdpsq.cq) : 0; + if (err) + goto err_close_rx_cq; + + napi_enable(&c->napi); + + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); + if (err) + goto err_disable_napi; + + err = mlx5e_open_sqs(c, params, cparam); + if (err) + goto err_close_icosq; + + err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq, false) : 0; + if (err) + goto err_close_sqs; + + err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq); + if (err) + goto err_close_xdp_sq; + + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->xdpsq, true); + if (err) + goto err_close_rq; + + *cp = c; + + return 0; + +err_close_rq: + mlx5e_close_rq(&c->rq); + +err_close_xdp_sq: + if (c->xdp) + mlx5e_close_xdpsq(&c->rq.xdpsq); + +err_close_sqs: + mlx5e_close_sqs(c); + +err_close_icosq: + mlx5e_close_icosq(&c->icosq); + +err_disable_napi: + napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->rq.xdpsq.cq); + +err_close_rx_cq: + mlx5e_close_cq(&c->rq.cq); + +err_close_xdp_tx_cqs: + mlx5e_close_cq(&c->xdpsq.cq); + +err_close_tx_cqs: + mlx5e_close_tx_cqs(c); + +err_close_icosq_cq: + mlx5e_close_cq(&c->icosq.cq); + +err_napi_del: + netif_napi_del(&c->napi); + kvfree(c); + + return err; +} + +static void mlx5e_activate_channel(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_activate_txqsq(&c->sq[tc]); + mlx5e_activate_rq(&c->rq); + netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); +} + +static void mlx5e_deactivate_channel(struct mlx5e_channel *c) +{ + int tc; + + mlx5e_deactivate_rq(&c->rq); + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_deactivate_txqsq(&c->sq[tc]); +} + +static void mlx5e_close_channel(struct mlx5e_channel *c) +{ + mlx5e_close_xdpsq(&c->xdpsq); + mlx5e_close_rq(&c->rq); + if (c->xdp) + mlx5e_close_xdpsq(&c->rq.xdpsq); + mlx5e_close_sqs(c); + mlx5e_close_icosq(&c->icosq); + napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->rq.xdpsq.cq); + mlx5e_close_cq(&c->rq.cq); + mlx5e_close_cq(&c->xdpsq.cq); + mlx5e_close_tx_cqs(c); + mlx5e_close_cq(&c->icosq.cq); + netif_napi_del(&c->napi); + + kvfree(c); +} + +#define DEFAULT_FRAG_SIZE (2048) + +static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_frags_info *info) +{ + u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); + int frag_size_max = DEFAULT_FRAG_SIZE; + u32 buf_size = 0; + int i; + +#ifdef CONFIG_MLX5_EN_IPSEC + if (MLX5_IPSEC_DEV(mdev)) + byte_count += MLX5E_METADATA_ETHER_LEN; +#endif + + if (mlx5e_rx_is_linear_skb(mdev, params)) { + int frag_stride; + + frag_stride = mlx5e_rx_get_linear_frag_sz(params); + frag_stride = roundup_pow_of_two(frag_stride); + + info->arr[0].frag_size = byte_count; + info->arr[0].frag_stride = frag_stride; + info->num_frags = 1; + info->wqe_bulk = PAGE_SIZE / frag_stride; + goto out; + } + + if (byte_count > PAGE_SIZE + + (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max) + frag_size_max = PAGE_SIZE; + + i = 0; + while (buf_size < byte_count) { + int frag_size = byte_count - buf_size; + + if (i < MLX5E_MAX_RX_FRAGS - 1) + frag_size = min(frag_size, frag_size_max); + + info->arr[i].frag_size = frag_size; + info->arr[i].frag_stride = roundup_pow_of_two(frag_size); + + buf_size += frag_size; + i++; + } + info->num_frags = i; + /* number of different wqes sharing a page */ + info->wqe_bulk = 1 + (info->num_frags % 2); + +out: + info->wqe_bulk = max_t(u8, info->wqe_bulk, 8); + info->log_num_frags = order_base_2(info->num_frags); +} + +static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) +{ + int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs; + + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + sz += sizeof(struct mlx5e_rx_wqe_ll); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + sz += sizeof(struct mlx5e_rx_wqe_cyc); + } + + return order_base_2(sz); +} + +static void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_rq_param *param) +{ + struct mlx5_core_dev *mdev = priv->mdev; + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + int ndsegs = 1; + + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + MLX5_SET(wq, wq, log_wqe_num_of_strides, + mlx5e_mpwqe_get_log_num_strides(mdev, params) - + MLX5_MPWQE_LOG_NUM_STRIDES_BASE); + MLX5_SET(wq, wq, log_wqe_stride_size, + mlx5e_mpwqe_get_log_stride_size(mdev, params) - + MLX5_MPWQE_LOG_STRIDE_SZ_BASE); + MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params)); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); + mlx5e_build_rq_frags_info(mdev, params, ¶m->frags_info); + ndsegs = param->frags_info.num_frags; + } + + MLX5_SET(wq, wq, wq_type, params->rq_wq_type); + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, log_wq_stride, + mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.pdn); + MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); + MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); + MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); + + param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); +} + +static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, + struct mlx5e_rq_param *param) +{ + struct mlx5_core_dev *mdev = priv->mdev; + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, log_wq_stride, + mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); + MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter); + + param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); +} + +static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); + + param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); +} + +static void mlx5e_build_sq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev)); +} + +static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); +} + +static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_cq_param *param) +{ + struct mlx5_core_dev *mdev = priv->mdev; + void *cqc = param->cqc; + u8 log_cq_size; + + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) + + mlx5e_mpwqe_get_log_num_strides(mdev, params); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + log_cq_size = params->log_rq_mtu_frames; + } + + MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM); + MLX5_SET(cqc, cqc, cqe_comp_en, 1); + } + + mlx5e_build_common_cq_param(priv, param); + param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; +} + +static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); + + mlx5e_build_common_cq_param(priv, param); + param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; +} + +static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); + + mlx5e_build_common_cq_param(priv, param); + + param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); +} + +static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); +} + +static void mlx5e_build_channel_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) +{ + u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + + mlx5e_build_rq_param(priv, params, &cparam->rq); + mlx5e_build_sq_param(priv, params, &cparam->sq); + mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); + mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq); + mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq); +} + +int mlx5e_open_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs) +{ + struct mlx5e_channel_param *cparam; + int err = -ENOMEM; + int i; + + chs->num = chs->params.num_channels; + + chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL); + cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL); + if (!chs->c || !cparam) + goto err_free; + + mlx5e_build_channel_param(priv, &chs->params, cparam); + for (i = 0; i < chs->num; i++) { + err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]); + if (err) + goto err_close_channels; + } + + kvfree(cparam); + return 0; + +err_close_channels: + for (i--; i >= 0; i--) + mlx5e_close_channel(chs->c[i]); + +err_free: + kfree(chs->c); + kvfree(cparam); + chs->num = 0; + return err; +} + +static void mlx5e_activate_channels(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_activate_channel(chs->c[i]); +} + +static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) +{ + int err = 0; + int i; + + for (i = 0; i < chs->num; i++) + err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, + err ? 0 : 20000); + + return err ? -ETIMEDOUT : 0; +} + +static void mlx5e_deactivate_channels(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_deactivate_channel(chs->c[i]); +} + +void mlx5e_close_channels(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_close_channel(chs->c[i]); + + kfree(chs->c); + chs->num = 0; +} + +static int +mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt) +{ + struct mlx5_core_dev *mdev = priv->mdev; + void *rqtc; + int inlen; + int err; + u32 *in; + int i; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + MLX5_SET(rqtc, rqtc, rqt_max_size, sz); + + for (i = 0; i < sz; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn); + + err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn); + if (!err) + rqt->enabled = true; + + kvfree(in); + return err; +} + +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) +{ + rqt->enabled = false; + mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); +} + +int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) +{ + struct mlx5e_rqt *rqt = &priv->indir_rqt; + int err; + + err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt); + if (err) + mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err); + return err; +} + +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) +{ + struct mlx5e_rqt *rqt; + int err; + int ix; + + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + rqt = &priv->direct_tir[ix].rqt; + err = mlx5e_create_rqt(priv, 1 /*size */, rqt); + if (err) + goto err_destroy_rqts; + } + + return 0; + +err_destroy_rqts: + mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err); + for (ix--; ix >= 0; ix--) + mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); + + return err; +} + +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); +} + +static int mlx5e_rx_hash_fn(int hfunc) +{ + return (hfunc == ETH_RSS_HASH_TOP) ? + MLX5_RX_HASH_FN_TOEPLITZ : + MLX5_RX_HASH_FN_INVERTED_XOR8; +} + +int mlx5e_bits_invert(unsigned long a, int size) +{ + int inv = 0; + int i; + + for (i = 0; i < size; i++) + inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + + return inv; +} + +static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz, + struct mlx5e_redirect_rqt_param rrp, void *rqtc) +{ + int i; + + for (i = 0; i < sz; i++) { + u32 rqn; + + if (rrp.is_rss) { + int ix = i; + + if (rrp.rss.hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(i, ilog2(sz)); + + ix = priv->channels.params.indirection_rqt[ix]; + rqn = rrp.rss.channels->c[ix]->rq.rqn; + } else { + rqn = rrp.rqn; + } + MLX5_SET(rqtc, rqtc, rq_num[i], rqn); + } +} + +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, + struct mlx5e_redirect_rqt_param rrp) +{ + struct mlx5_core_dev *mdev = priv->mdev; + void *rqtc; + int inlen; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); + mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc); + err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen); + + kvfree(in); + return err; +} + +static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix, + struct mlx5e_redirect_rqt_param rrp) +{ + if (!rrp.is_rss) + return rrp.rqn; + + if (ix >= rrp.rss.channels->num) + return priv->drop_rq.rqn; + + return rrp.rss.channels->c[ix]->rq.rqn; +} + +static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, + struct mlx5e_redirect_rqt_param rrp) +{ + u32 rqtn; + int ix; + + if (priv->indir_rqt.enabled) { + /* RSS RQ table */ + rqtn = priv->indir_rqt.rqtn; + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); + } + + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + struct mlx5e_redirect_rqt_param direct_rrp = { + .is_rss = false, + { + .rqn = mlx5e_get_direct_rqn(priv, ix, rrp) + }, + }; + + /* Direct RQ Tables */ + if (!priv->direct_tir[ix].rqt.enabled) + continue; + + rqtn = priv->direct_tir[ix].rqt.rqtn; + mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); + } +} + +static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs) +{ + struct mlx5e_redirect_rqt_param rrp = { + .is_rss = true, + { + .rss = { + .channels = chs, + .hfunc = chs->params.rss_hfunc, + } + }, + }; + + mlx5e_redirect_rqts(priv, rrp); +} + +static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) +{ + struct mlx5e_redirect_rqt_param drop_rrp = { + .is_rss = false, + { + .rqn = priv->drop_rq.rqn, + }, + }; + + mlx5e_redirect_rqts(priv, drop_rrp); +} + +static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) +{ + if (!params->lro_en) + return; + +#define ROUGH_MAX_L2_L3_HDR_SZ 256 + + MLX5_SET(tirc, tirc, lro_enable_mask, + MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | + MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); + MLX5_SET(tirc, tirc, lro_max_ip_payload_size, + (params->lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout); +} + +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, + enum mlx5e_traffic_types tt, + void *tirc, bool inner) +{ + void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) : + MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + +#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP) + +#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_L4_SPORT |\ + MLX5_HASH_FIELD_SEL_L4_DPORT) + +#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_IPSEC_SPI) + + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc)); + if (params->rss_hfunc == ETH_RSS_HASH_TOP) { + void *rss_key = MLX5_ADDR_OF(tirc, tirc, + rx_hash_toeplitz_key); + size_t len = MLX5_FLD_SZ_BYTES(tirc, + rx_hash_toeplitz_key); + + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + memcpy(rss_key, params->toeplitz_hash_key, len); + } + + switch (tt) { + case MLX5E_TT_IPV4_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV6_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV4_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV6_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV4_IPSEC_AH: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV6_IPSEC_AH: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV4_IPSEC_ESP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV6_IPSEC_ESP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV4: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + + case MLX5E_TT_IPV6: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + default: + WARN_ONCE(true, "%s: bad traffic type!\n", __func__); + } +} + +static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *tirc; + int inlen; + int err; + int tt; + int ix; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tir_in, in, bitmask.lro, 1); + tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); + + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, + inlen); + if (err) + goto free_in; + } + + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, + in, inlen); + if (err) + goto free_in; + } + +free_in: + kvfree(in); + + return err; +} + +static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, + enum mlx5e_traffic_types tt, + u32 *tirc) +{ + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); + MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1); + + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); +} + +static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u16 mtu) +{ + u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu); + int err; + + err = mlx5_set_port_mtu(mdev, hw_mtu, 1); + if (err) + return err; + + /* Update vport context MTU */ + mlx5_modify_nic_vport_mtu(mdev, hw_mtu); + return 0; +} + +static void mlx5e_query_mtu(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u16 *mtu) +{ + u16 hw_mtu = 0; + int err; + + err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); + if (err || !hw_mtu) /* fallback to port oper mtu */ + mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); + + *mtu = MLX5E_HW2SW_MTU(params, hw_mtu); +} + +static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) +{ + struct mlx5e_params *params = &priv->channels.params; + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 mtu; + int err; + + err = mlx5e_set_mtu(mdev, params, params->sw_mtu); + if (err) + return err; + + mlx5e_query_mtu(mdev, params, &mtu); + if (mtu != params->sw_mtu) + netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n", + __func__, mtu, params->sw_mtu); + + params->sw_mtu = mtu; + return 0; +} + +static void mlx5e_netdev_set_tcs(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int nch = priv->channels.params.num_channels; + int ntc = priv->channels.params.num_tc; + int tc; + + netdev_reset_tc(netdev); + + if (ntc == 1) + return; + + netdev_set_num_tc(netdev, ntc); + + /* Map netdev TCs to offset 0 + * We have our own UP to TXQ mapping for QoS + */ + for (tc = 0; tc < ntc; tc++) + netdev_set_tc_queue(netdev, tc, nch, 0); +} + +static void mlx5e_build_tc2txq_maps(struct mlx5e_priv *priv) +{ + int max_nch = priv->profile->max_nch(priv->mdev); + int i, tc; + + for (i = 0; i < max_nch; i++) + for (tc = 0; tc < priv->profile->max_tc; tc++) + priv->channel_tc2txq[i][tc] = i + tc * max_nch; +} + +static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv) +{ + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + int i, tc; + + for (i = 0; i < priv->channels.num; i++) { + c = priv->channels.c[i]; + for (tc = 0; tc < c->num_tc; tc++) { + sq = &c->sq[tc]; + priv->txq2sq[sq->txq_ix] = sq; + } + } +} + +void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) +{ + int num_txqs = priv->channels.num * priv->channels.params.num_tc; + struct net_device *netdev = priv->netdev; + + mlx5e_netdev_set_tcs(netdev); + netif_set_real_num_tx_queues(netdev, num_txqs); + netif_set_real_num_rx_queues(netdev, priv->channels.num); + + mlx5e_build_tx2sq_maps(priv); + mlx5e_activate_channels(&priv->channels); + mlx5e_xdp_tx_enable(priv); + netif_tx_start_all_queues(priv->netdev); + + if (MLX5_ESWITCH_MANAGER(priv->mdev)) + mlx5e_add_sqs_fwd_rules(priv); + + mlx5e_wait_channels_min_rx_wqes(&priv->channels); + mlx5e_redirect_rqts_to_channels(priv, &priv->channels); +} + +void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) +{ + mlx5e_redirect_rqts_to_drop(priv); + + if (MLX5_ESWITCH_MANAGER(priv->mdev)) + mlx5e_remove_sqs_fwd_rules(priv); + + /* FIXME: This is a W/A only for tx timeout watch dog false alarm when + * polling for inactive tx queues. + */ + netif_tx_stop_all_queues(priv->netdev); + netif_tx_disable(priv->netdev); + mlx5e_xdp_tx_disable(priv); + mlx5e_deactivate_channels(&priv->channels); +} + +void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify) +{ + struct net_device *netdev = priv->netdev; + int new_num_txqs; + int carrier_ok; + new_num_txqs = new_chs->num * new_chs->params.num_tc; + + carrier_ok = netif_carrier_ok(netdev); + netif_carrier_off(netdev); + + if (new_num_txqs < netdev->real_num_tx_queues) + netif_set_real_num_tx_queues(netdev, new_num_txqs); + + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + + priv->channels = *new_chs; + + /* New channels are ready to roll, modify HW settings if needed */ + if (hw_modify) + hw_modify(priv); + + mlx5e_refresh_tirs(priv, false); + mlx5e_activate_priv_channels(priv); + + /* return carrier back if needed */ + if (carrier_ok) + netif_carrier_on(netdev); +} + +void mlx5e_timestamp_init(struct mlx5e_priv *priv) +{ + priv->tstamp.tx_type = HWTSTAMP_TX_OFF; + priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE; +} + +int mlx5e_open_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + set_bit(MLX5E_STATE_OPENED, &priv->state); + + err = mlx5e_open_channels(priv, &priv->channels); + if (err) + goto err_clear_state_opened_flag; + + mlx5e_refresh_tirs(priv, false); + mlx5e_activate_priv_channels(priv); + if (priv->profile->update_carrier) + priv->profile->update_carrier(priv); + + if (priv->profile->update_stats) + queue_delayed_work(priv->wq, &priv->update_stats_work, 0); + + return 0; + +err_clear_state_opened_flag: + clear_bit(MLX5E_STATE_OPENED, &priv->state); + return err; +} + +int mlx5e_open(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_open_locked(netdev); + if (!err) + mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP); + mutex_unlock(&priv->state_lock); + + if (mlx5_vxlan_allowed(priv->mdev->vxlan)) + udp_tunnel_get_rx_info(netdev); + + return err; +} + +int mlx5e_close_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + /* May already be CLOSED in case a previous configuration operation + * (e.g RX/TX queue size change) that involves close&open failed. + */ + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + netif_carrier_off(priv->netdev); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + + return 0; +} + +int mlx5e_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + if (!netif_device_present(netdev)) + return -ENODEV; + + mutex_lock(&priv->state_lock); + mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN); + err = mlx5e_close_locked(netdev); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev, + struct mlx5e_rq *rq, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + int err; + + param->wq.db_numa_node = param->wq.buf_numa_node; + + err = mlx5_wq_cyc_create(mdev, ¶m->wq, rqc_wq, &rq->wqe.wq, + &rq->wq_ctrl); + if (err) + return err; + + /* Mark as unused given "Drop-RQ" packets never reach XDP */ + xdp_rxq_info_unused(&rq->xdp_rxq); + + rq->mdev = mdev; + + return 0; +} + +static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev, + struct mlx5e_cq *cq, + struct mlx5e_cq_param *param) +{ + param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); + param->wq.db_numa_node = dev_to_node(&mdev->pdev->dev); + + return mlx5e_alloc_cq_common(mdev, param, cq); +} + +static int mlx5e_open_drop_rq(struct mlx5e_priv *priv, + struct mlx5e_rq *drop_rq) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_cq_param cq_param = {}; + struct mlx5e_rq_param rq_param = {}; + struct mlx5e_cq *cq = &drop_rq->cq; + int err; + + mlx5e_build_drop_rq_param(priv, &rq_param); + + err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param); + if (err) + return err; + + err = mlx5e_create_cq(cq, &cq_param); + if (err) + goto err_free_cq; + + err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param); + if (err) + goto err_destroy_cq; + + err = mlx5e_create_rq(drop_rq, &rq_param); + if (err) + goto err_free_rq; + + err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) + mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err); + + return 0; + +err_free_rq: + mlx5e_free_rq(drop_rq); + +err_destroy_cq: + mlx5e_destroy_cq(cq); + +err_free_cq: + mlx5e_free_cq(cq); + + return err; +} + +static void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) +{ + mlx5e_destroy_rq(drop_rq); + mlx5e_free_rq(drop_rq); + mlx5e_destroy_cq(&drop_rq->cq); + mlx5e_free_cq(&drop_rq->cq); +} + +int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, + u32 underlay_qpn, u32 *tisn) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; + void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, prio, tc << 1); + MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); + + if (mlx5_lag_is_lacp_owner(mdev)) + MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); + + return mlx5_core_create_tis(mdev, in, sizeof(in), tisn); +} + +void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn) +{ + mlx5_core_destroy_tis(mdev, tisn); +} + +int mlx5e_create_tises(struct mlx5e_priv *priv) +{ + int err; + int tc; + + for (tc = 0; tc < priv->profile->max_tc; tc++) { + err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]); + if (err) + goto err_close_tises; + } + + return 0; + +err_close_tises: + for (tc--; tc >= 0; tc--) + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); + + return err; +} + +void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) +{ + int tc; + + for (tc = 0; tc < priv->profile->max_tc; tc++) + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); +} + +static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, + enum mlx5e_traffic_types tt, + u32 *tirc) +{ + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); +} + +static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) +{ + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, rqtn); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); +} + +int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) +{ + struct mlx5e_tir *tir; + void *tirc; + int inlen; + int i = 0; + int err; + u32 *in; + int tt; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(in, 0, inlen); + tir = &priv->indir_tir[tt]; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_indir_tir_ctx(priv, tt, tirc); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); + if (err) { + mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_inner_tirs; + } + } + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + goto out; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { + memset(in, 0, inlen); + tir = &priv->inner_indir_tir[i]; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_inner_indir_tir_ctx(priv, i, tirc); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); + if (err) { + mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err); + goto err_destroy_inner_tirs; + } + } + +out: + kvfree(in); + + return 0; + +err_destroy_inner_tirs: + for (i--; i >= 0; i--) + mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); + + for (tt--; tt >= 0; tt--) + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); + + kvfree(in); + + return err; +} + +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) +{ + int nch = priv->profile->max_nch(priv->mdev); + struct mlx5e_tir *tir; + void *tirc; + int inlen; + int err; + u32 *in; + int ix; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + for (ix = 0; ix < nch; ix++) { + memset(in, 0, inlen); + tir = &priv->direct_tir[ix]; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); + if (err) + goto err_destroy_ch_tirs; + } + + kvfree(in); + + return 0; + +err_destroy_ch_tirs: + mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err); + for (ix--; ix >= 0; ix--) + mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); + + kvfree(in); + + return err; +} + +void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); +} + +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) +{ + int nch = priv->profile->max_nch(priv->mdev); + int i; + + for (i = 0; i < nch; i++) + mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); +} + +static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) +{ + int err = 0; + int i; + + for (i = 0; i < chs->num; i++) { + err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable); + if (err) + return err; + } + + return 0; +} + +static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) +{ + int err = 0; + int i; + + for (i = 0; i < chs->num; i++) { + err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd); + if (err) + return err; + } + + return 0; +} + +static int mlx5e_setup_tc_mqprio(struct net_device *netdev, + struct tc_mqprio_qopt *mqprio) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels new_channels = {}; + u8 tc = mqprio->num_tc; + int err = 0; + + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + + if (tc && tc != MLX5E_MAX_NUM_TC) + return -EINVAL; + + mutex_lock(&priv->state_lock); + + new_channels.params = priv->channels.params; + new_channels.params.num_tc = tc ? tc : 1; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + + priv->max_opened_tc = max_t(u8, priv->max_opened_tc, + new_channels.params.num_tc); + mlx5e_switch_priv_channels(priv, &new_channels, NULL); +out: + mutex_unlock(&priv->state_lock); + return err; +} + +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *cls_flower, + int flags) +{ + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, cls_flower, flags); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, cls_flower, flags); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, cls_flower, flags); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct mlx5e_priv *priv = cb_priv; + + if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb, + priv, priv, f->extack); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb, + priv); + return 0; + default: + return -EOPNOTSUPP; + } +} +#endif + +static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { +#ifdef CONFIG_MLX5_ESWITCH + case TC_SETUP_BLOCK: + return mlx5e_setup_tc_block(dev, type_data); +#endif + case TC_SETUP_QDISC_MQPRIO: + return mlx5e_setup_tc_mqprio(dev, type_data); + default: + return -EOPNOTSUPP; + } +} + +static void +mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_sw_stats *sstats = &priv->stats.sw; + struct mlx5e_vport_stats *vstats = &priv->stats.vport; + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + + /* update HW stats in background for next time */ + queue_delayed_work(priv->wq, &priv->update_stats_work, 0); + + if (mlx5e_is_uplink_rep(priv)) { + stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); + stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok); + stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); + stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); + } else { + mlx5e_grp_sw_update_stats(priv); + stats->rx_packets = sstats->rx_packets; + stats->rx_bytes = sstats->rx_bytes; + stats->tx_packets = sstats->tx_packets; + stats->tx_bytes = sstats->tx_bytes; + stats->tx_dropped = sstats->tx_queue_dropped; + } + + stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer; + + stats->rx_length_errors = + PPORT_802_3_GET(pstats, a_in_range_length_errors) + + PPORT_802_3_GET(pstats, a_out_of_range_length_field) + + PPORT_802_3_GET(pstats, a_frame_too_long_errors); + stats->rx_crc_errors = + PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); + stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors); + stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards); + stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + + stats->rx_frame_errors; + stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; + + /* vport multicast also counts packets that are dropped due to steering + * or rx out of buffer + */ + stats->multicast = + VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); +} + +static void mlx5e_set_rx_mode(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + queue_work(priv->wq, &priv->set_rx_mode_work); +} + +static int mlx5e_set_mac(struct net_device *netdev, void *addr) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct sockaddr *saddr = addr; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + netif_addr_lock_bh(netdev); + ether_addr_copy(netdev->dev_addr, saddr->sa_data); + netif_addr_unlock_bh(netdev); + + queue_work(priv->wq, &priv->set_rx_mode_work); + + return 0; +} + +#define MLX5E_SET_FEATURE(features, feature, enable) \ + do { \ + if (enable) \ + *features |= feature; \ + else \ + *features &= ~feature; \ + } while (0) + +typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); + +static int set_feature_lro(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; + struct mlx5e_params *old_params; + int err = 0; + bool reset; + + mutex_lock(&priv->state_lock); + + old_params = &priv->channels.params; + if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { + netdev_warn(netdev, "can't set LRO with legacy RQ\n"); + err = -EINVAL; + goto out; + } + + reset = test_bit(MLX5E_STATE_OPENED, &priv->state); + + new_channels.params = *old_params; + new_channels.params.lro_en = enable; + + if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) == + mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params)) + reset = false; + } + + if (!reset) { + *old_params = new_channels.params; + err = mlx5e_modify_tirs_lro(priv); + goto out; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + + mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_modify_tirs_lro); +out: + mutex_unlock(&priv->state_lock); + return err; +} + +static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (enable) + mlx5e_enable_cvlan_filter(priv); + else + mlx5e_disable_cvlan_filter(priv); + + return 0; +} + +#ifdef CONFIG_MLX5_ESWITCH +static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (!enable && mlx5e_tc_num_filters(priv)) { + netdev_err(netdev, + "Active offloaded tc filters, can't turn hw_tc_offload off\n"); + return -EINVAL; + } + + return 0; +} +#endif + +static int set_feature_rx_all(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_set_port_fcs(mdev, !enable); +} + +static int set_feature_rx_fcs(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + + priv->channels.params.scatter_fcs_en = enable; + err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable); + if (err) + priv->channels.params.scatter_fcs_en = !enable; + + mutex_unlock(&priv->state_lock); + + return err; +} + +static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + + mutex_lock(&priv->state_lock); + + priv->channels.params.vlan_strip_disable = !enable; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = mlx5e_modify_channels_vsd(&priv->channels, !enable); + if (err) + priv->channels.params.vlan_strip_disable = enable; + +unlock: + mutex_unlock(&priv->state_lock); + + return err; +} + +#ifdef CONFIG_MLX5_EN_ARFS +static int set_feature_arfs(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + if (enable) + err = mlx5e_arfs_enable(priv); + else + err = mlx5e_arfs_disable(priv); + + return err; +} +#endif + +static int mlx5e_handle_feature(struct net_device *netdev, + netdev_features_t *features, + netdev_features_t feature, + mlx5e_feature_handler feature_handler) +{ + netdev_features_t changes = *features ^ netdev->features; + bool enable = !!(*features & feature); + int err; + + if (!(changes & feature)) + return 0; + + err = feature_handler(netdev, enable); + if (err) { + MLX5E_SET_FEATURE(features, feature, !enable); + netdev_err(netdev, "%s feature %pNF failed, err %d\n", + enable ? "Enable" : "Disable", &feature, err); + return err; + } + + return 0; +} + +static int mlx5e_set_features(struct net_device *netdev, + netdev_features_t features) +{ + netdev_features_t oper_features = features; + int err = 0; + +#define MLX5E_HANDLE_FEATURE(feature, handler) \ + mlx5e_handle_feature(netdev, &oper_features, feature, handler) + + err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, + set_feature_cvlan_filter); +#ifdef CONFIG_MLX5_ESWITCH + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters); +#endif + err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); +#ifdef CONFIG_MLX5_EN_ARFS + err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs); +#endif + + if (err) { + netdev->features = oper_features; + return -EINVAL; + } + + return 0; +} + +static netdev_features_t mlx5e_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_params *params; + + mutex_lock(&priv->state_lock); + params = &priv->channels.params; + if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) { + /* HW strips the outer C-tag header, this is a problem + * for S-tag traffic. + */ + features &= ~NETIF_F_HW_VLAN_CTAG_RX; + if (!params->vlan_strip_disable) + netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); + } + if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) { + features &= ~NETIF_F_LRO; + if (params->lro_en) + netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); + } + + if (params->xdp_prog) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "LRO is incompatible with XDP\n"); + features &= ~NETIF_F_LRO; + } + } + + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + features &= ~NETIF_F_RXHASH; + if (netdev->features & NETIF_F_RXHASH) + netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); + } + + mutex_unlock(&priv->state_lock); + + return features; +} + +int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, + change_hw_mtu_cb set_mtu_cb) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels new_channels = {}; + struct mlx5e_params *params; + int err = 0; + bool reset; + + mutex_lock(&priv->state_lock); + + params = &priv->channels.params; + + reset = !params->lro_en; + reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state); + + new_channels.params = *params; + new_channels.params.sw_mtu = new_mtu; + + if (params->xdp_prog && + !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) { + netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n", + new_mtu, mlx5e_xdp_max_mtu(params)); + err = -EINVAL; + goto out; + } + + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params); + u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params); + u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params); + + reset = reset && (is_linear || (ppw_old != ppw_new)); + } + + if (!reset) { + params->sw_mtu = new_mtu; + if (set_mtu_cb) + set_mtu_cb(priv); + netdev->mtu = params->sw_mtu; + goto out; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + + mlx5e_switch_priv_channels(priv, &new_channels, set_mtu_cb); + netdev->mtu = new_channels.params.sw_mtu; + +out: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu) +{ + return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu); +} + +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || + (mlx5_clock_get_ptp_index(priv->mdev) == -1)) + return -EOPNOTSUPP; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* TX HW timestamp */ + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + /* RX HW timestamp */ + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + /* Reset CQE compression to Admin default */ + mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + /* Disable CQE compression */ + netdev_warn(priv->netdev, "Disabling cqe compression"); + err = mlx5e_modify_rx_cqe_compression_locked(priv, false); + if (err) { + netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); + mutex_unlock(&priv->state_lock); + return err; + } + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + mutex_unlock(&priv->state_lock); + return -ERANGE; + } + + memcpy(&priv->tstamp, &config, sizeof(config)); + mutex_unlock(&priv->state_lock); + + /* might need to fix some features */ + netdev_update_features(priv->netdev); + + return copy_to_user(ifr->ifr_data, &config, + sizeof(config)) ? -EFAULT : 0; +} + +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) +{ + struct hwtstamp_config *cfg = &priv->tstamp; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0; +} + +static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return mlx5e_hwstamp_set(priv, ifr); + case SIOCGHWTSTAMP: + return mlx5e_hwstamp_get(priv, ifr); + default: + return -EOPNOTSUPP; + } +} + +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); +} + +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, + vlan, qos); +} + +static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting); +} + +static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting); +} + +static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1, + max_tx_rate, min_tx_rate); +} + +static int mlx5_vport_link2ifla(u8 esw_link) +{ + switch (esw_link) { + case MLX5_VPORT_ADMIN_STATE_DOWN: + return IFLA_VF_LINK_STATE_DISABLE; + case MLX5_VPORT_ADMIN_STATE_UP: + return IFLA_VF_LINK_STATE_ENABLE; + } + return IFLA_VF_LINK_STATE_AUTO; +} + +static int mlx5_ifla_link2vport(u8 ifla_link) +{ + switch (ifla_link) { + case IFLA_VF_LINK_STATE_DISABLE: + return MLX5_VPORT_ADMIN_STATE_DOWN; + case IFLA_VF_LINK_STATE_ENABLE: + return MLX5_VPORT_ADMIN_STATE_UP; + } + return MLX5_VPORT_ADMIN_STATE_AUTO; +} + +static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, + int link_state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, + mlx5_ifla_link2vport(link_state)); +} + +static int mlx5e_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); + if (err) + return err; + ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate); + return 0; +} + +static int mlx5e_get_vf_stats(struct net_device *dev, + int vf, struct ifla_vf_stats *vf_stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, + vf_stats); +} +#endif + +struct mlx5e_vxlan_work { + struct work_struct work; + struct mlx5e_priv *priv; + u16 port; +}; + +static void mlx5e_vxlan_add_work(struct work_struct *work) +{ + struct mlx5e_vxlan_work *vxlan_work = + container_of(work, struct mlx5e_vxlan_work, work); + struct mlx5e_priv *priv = vxlan_work->priv; + u16 port = vxlan_work->port; + + mutex_lock(&priv->state_lock); + mlx5_vxlan_add_port(priv->mdev->vxlan, port); + mutex_unlock(&priv->state_lock); + + kfree(vxlan_work); +} + +static void mlx5e_vxlan_del_work(struct work_struct *work) +{ + struct mlx5e_vxlan_work *vxlan_work = + container_of(work, struct mlx5e_vxlan_work, work); + struct mlx5e_priv *priv = vxlan_work->priv; + u16 port = vxlan_work->port; + + mutex_lock(&priv->state_lock); + mlx5_vxlan_del_port(priv->mdev->vxlan, port); + mutex_unlock(&priv->state_lock); + kfree(vxlan_work); +} + +static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add) +{ + struct mlx5e_vxlan_work *vxlan_work; + + vxlan_work = kmalloc(sizeof(*vxlan_work), GFP_ATOMIC); + if (!vxlan_work) + return; + + if (add) + INIT_WORK(&vxlan_work->work, mlx5e_vxlan_add_work); + else + INIT_WORK(&vxlan_work->work, mlx5e_vxlan_del_work); + + vxlan_work->priv = priv; + vxlan_work->port = port; + queue_work(priv->wq, &vxlan_work->work); +} + +static void mlx5e_add_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + return; + + if (!mlx5_vxlan_allowed(priv->mdev->vxlan)) + return; + + mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1); +} + +static void mlx5e_del_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + return; + + if (!mlx5_vxlan_allowed(priv->mdev->vxlan)) + return; + + mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0); +} + +static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, + struct sk_buff *skb, + netdev_features_t features) +{ + unsigned int offset = 0; + struct udphdr *udph; + u8 proto; + u16 port; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + proto = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); + break; + default: + goto out; + } + + switch (proto) { + case IPPROTO_GRE: + return features; + case IPPROTO_UDP: + udph = udp_hdr(skb); + port = be16_to_cpu(udph->dest); + + /* Verify if UDP port is being offloaded by HW */ + if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port)) + return features; + } + +out: + /* Disable CSUM and GSO if the udp dport is not offloaded by HW */ + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + +static netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + features = vlan_features_check(skb, features); + features = vxlan_features_check(skb, features); + +#ifdef CONFIG_MLX5_EN_IPSEC + if (mlx5e_ipsec_feature_check(skb, netdev, features)) + return features; +#endif + + /* Validate if the tunneled packet is being offloaded by HW */ + if (skb->encapsulation && + (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK)) + return mlx5e_tunnel_features_check(priv, skb, features); + + return features; +} + +static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, + struct mlx5e_txqsq *sq) +{ + struct mlx5_eq *eq = sq->cq.mcq.eq; + u32 eqe_count; + + netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eq->eqn, eq->cons_index, eq->irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); + if (!eqe_count) + return false; + + netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn); + sq->channel->stats->eq_rearm++; + return true; +} + +static void mlx5e_tx_timeout_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + tx_timeout_work); + struct net_device *dev = priv->netdev; + bool reopen_channels = false; + int i, err; + + rtnl_lock(); + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); + struct mlx5e_txqsq *sq = priv->txq2sq[i]; + + if (!netif_xmit_stopped(dev_queue)) + continue; + + netdev_err(dev, + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", + i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - dev_queue->trans_start)); + + /* If we recover a lost interrupt, most likely TX timeout will + * be resolved, skip reopening channels + */ + if (!mlx5e_tx_timeout_eq_recover(dev, sq)) { + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + reopen_channels = true; + } + } + + if (!reopen_channels) + goto unlock; + + mlx5e_close_locked(dev); + err = mlx5e_open_locked(dev); + if (err) + netdev_err(priv->netdev, + "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", + err); + +unlock: + mutex_unlock(&priv->state_lock); + rtnl_unlock(); +} + +static void mlx5e_tx_timeout(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + netdev_err(dev, "TX timeout detected\n"); + queue_work(priv->wq, &priv->tx_timeout_work); +} + +static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) +{ + struct net_device *netdev = priv->netdev; + struct mlx5e_channels new_channels = {}; + + if (priv->channels.params.lro_en) { + netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n"); + return -EINVAL; + } + + if (MLX5_IPSEC_DEV(priv->mdev)) { + netdev_warn(netdev, "can't set XDP with IPSec offload\n"); + return -EINVAL; + } + + new_channels.params = priv->channels.params; + new_channels.params.xdp_prog = prog; + + if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) { + netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n", + new_channels.params.sw_mtu, + mlx5e_xdp_max_mtu(&new_channels.params)); + return -EINVAL; + } + + return 0; +} + +static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct bpf_prog *old_prog; + bool reset, was_opened; + int err = 0; + int i; + + mutex_lock(&priv->state_lock); + + if (prog) { + err = mlx5e_xdp_allowed(priv, prog); + if (err) + goto unlock; + } + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + /* no need for full reset when exchanging programs */ + reset = (!priv->channels.params.xdp_prog || !prog); + + if (was_opened && reset) + mlx5e_close_locked(netdev); + if (was_opened && !reset) { + /* num_channels is invariant here, so we can take the + * batched reference right upfront. + */ + prog = bpf_prog_add(prog, priv->channels.num); + if (IS_ERR(prog)) { + err = PTR_ERR(prog); + goto unlock; + } + } + + /* exchange programs, extra prog reference we got from caller + * as long as we don't fail from this point onwards. + */ + old_prog = xchg(&priv->channels.params.xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + if (reset) /* change RQ type according to priv->xdp_prog */ + mlx5e_set_rq_type(priv->mdev, &priv->channels.params); + + if (was_opened && reset) + mlx5e_open_locked(netdev); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + goto unlock; + + /* exchanging programs w/o reset, we update ref counts on behalf + * of the channels RQs here. + */ + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); + napi_synchronize(&c->napi); + /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ + + old_prog = xchg(&c->rq.xdp_prog, prog); + + set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); + /* napi_schedule in case we have missed anything */ + napi_schedule(&c->napi); + + if (old_prog) + bpf_prog_put(old_prog); + } + +unlock: + mutex_unlock(&priv->state_lock); + + /* Need to fix some features. */ + if (!err) + netdev_update_features(netdev); + + return err; +} + +static u32 mlx5e_xdp_query(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + const struct bpf_prog *xdp_prog; + u32 prog_id = 0; + + mutex_lock(&priv->state_lock); + xdp_prog = priv->channels.params.xdp_prog; + if (xdp_prog) + prog_id = xdp_prog->aux->id; + mutex_unlock(&priv->state_lock); + + return prog_id; +} + +static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx5e_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_id = mlx5e_xdp_query(dev); + return 0; + default: + return -EINVAL; + } +} + +const struct net_device_ops mlx5e_netdev_ops = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_setup_tc = mlx5e_setup_tc, + .ndo_select_queue = mlx5e_select_queue, + .ndo_get_stats64 = mlx5e_get_stats, + .ndo_set_rx_mode = mlx5e_set_rx_mode, + .ndo_set_mac_address = mlx5e_set_mac, + .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, + .ndo_set_features = mlx5e_set_features, + .ndo_fix_features = mlx5e_fix_features, + .ndo_change_mtu = mlx5e_change_nic_mtu, + .ndo_do_ioctl = mlx5e_ioctl, + .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, + .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, + .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, + .ndo_features_check = mlx5e_features_check, + .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_bpf = mlx5e_xdp, + .ndo_xdp_xmit = mlx5e_xdp_xmit, +#ifdef CONFIG_MLX5_EN_ARFS + .ndo_rx_flow_steer = mlx5e_rx_flow_steer, +#endif +#ifdef CONFIG_MLX5_ESWITCH + /* SRIOV E-Switch NDOs */ + .ndo_set_vf_mac = mlx5e_set_vf_mac, + .ndo_set_vf_vlan = mlx5e_set_vf_vlan, + .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk, + .ndo_set_vf_trust = mlx5e_set_vf_trust, + .ndo_set_vf_rate = mlx5e_set_vf_rate, + .ndo_get_vf_config = mlx5e_get_vf_config, + .ndo_set_vf_link_state = mlx5e_set_vf_link_state, + .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_has_offload_stats = mlx5e_has_offload_stats, + .ndo_get_offload_stats = mlx5e_get_offload_stats, +#endif +}; + +static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -EOPNOTSUPP; + if (!MLX5_CAP_GEN(mdev, eth_net_offloads) || + !MLX5_CAP_GEN(mdev, nic_flow_table) || + !MLX5_CAP_ETH(mdev, csum_cap) || + !MLX5_CAP_ETH(mdev, max_lso_cap) || + !MLX5_CAP_ETH(mdev, vlan_cap) || + !MLX5_CAP_ETH(mdev, rss_ind_tbl_cap) || + MLX5_CAP_FLOWTABLE(mdev, + flow_table_properties_nic_receive.max_ft_level) + < 3) { + mlx5_core_warn(mdev, + "Not creating net device, some required device capabilities are missing\n"); + return -EOPNOTSUPP; + } + if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable)) + mlx5_core_warn(mdev, "Self loop back prevention is not supported\n"); + if (!MLX5_CAP_GEN(mdev, cq_moderation)) + mlx5_core_warn(mdev, "CQ moderation is not supported\n"); + + return 0; +} + +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, + int num_channels) +{ + int i; + + for (i = 0; i < len; i++) + indirection_rqt[i] = i % num_channels; +} + +static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) +{ + u32 link_speed = 0; + u32 pci_bw = 0; + + mlx5e_port_max_linkspeed(mdev, &link_speed); + pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL); + mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); + +#define MLX5E_SLOW_PCI_RATIO (2) + + return link_speed && pci_bw && + link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; +} + +static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) +{ + struct net_dim_cq_moder moder; + + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} + +static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +{ + struct net_dim_cq_moder moder; + + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} + +static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) +{ + return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE : + NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + if (params->tx_dim_enabled) { + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode); + } else { + params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); + } + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, + params->tx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +} + +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + if (params->rx_dim_enabled) { + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode); + } else { + params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); + } + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, + params->rx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +} + +static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) +{ + int i; + + /* The supported periods are organized in ascending order */ + for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) + if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) + break; + + return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); +} + +void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 max_channels, u16 mtu) +{ + u8 rx_cq_period_mode; + + params->sw_mtu = mtu; + params->hard_mtu = MLX5E_ETH_HARD_MTU; + params->num_channels = max_channels; + params->num_tc = 1; + + /* SQ */ + params->log_sq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + + /* set CQE compression */ + params->rx_cqe_compress_def = false; + if (MLX5_CAP_GEN(mdev, cqe_compression) && + MLX5_CAP_GEN(mdev, vport_group_manager)) + params->rx_cqe_compress_def = slow_pci_heuristic(mdev); + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false); + + /* RQ */ + /* Prefer Striding RQ, unless any of the following holds: + * - Striding RQ configuration is not possible/supported. + * - Slow PCI heuristic. + * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. + */ + if (!slow_pci_heuristic(mdev) && + mlx5e_striding_rq_possible(mdev, params) && + (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) || + !mlx5e_rx_is_linear_skb(mdev, params))) + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); + mlx5e_set_rq_type(mdev, params); + mlx5e_init_rq_type_params(mdev, params); + + /* HW LRO */ + + /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) + params->lro_en = !slow_pci_heuristic(mdev); + params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + + /* CQ moderation params */ + rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode); + mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); + + /* TX inline */ + params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); + + /* RSS */ + params->rss_hfunc = ETH_RSS_HASH_XOR; + netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); + mlx5e_build_default_indir_rqt(params->indirection_rqt, + MLX5E_INDIR_RQT_SIZE, max_channels); +} + +static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; + priv->msglevel = MLX5E_MSG_LEVEL; + priv->max_opened_tc = 1; + + mlx5e_build_nic_params(mdev, &priv->channels.params, + profile->max_nch(mdev), netdev->mtu); + + mutex_init(&priv->state_lock); + + INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); + INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); + INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); + INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + + mlx5e_timestamp_init(priv); +} + +static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr); + if (is_zero_ether_addr(netdev->dev_addr) && + !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) { + eth_hw_addr_random(netdev); + mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr); + } +} + +#if IS_ENABLED(CONFIG_MLX5_ESWITCH) +static const struct switchdev_ops mlx5e_switchdev_ops = { + .switchdev_port_attr_get = mlx5e_attr_get, +}; +#endif + +static void mlx5e_build_nic_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + bool fcs_supported; + bool fcs_enabled; + + SET_NETDEV_DEV(netdev, &mdev->pdev->dev); + + netdev->netdev_ops = &mlx5e_netdev_ops; + +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; +#endif + + netdev->watchdog_timeo = 15 * HZ; + + netdev->ethtool_ops = &mlx5e_ethtool_ops; + + netdev->vlan_features |= NETIF_F_SG; + netdev->vlan_features |= NETIF_F_IP_CSUM; + netdev->vlan_features |= NETIF_F_IPV6_CSUM; + netdev->vlan_features |= NETIF_F_GRO; + netdev->vlan_features |= NETIF_F_TSO; + netdev->vlan_features |= NETIF_F_TSO6; + netdev->vlan_features |= NETIF_F_RXCSUM; + netdev->vlan_features |= NETIF_F_RXHASH; + + netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX; + + if (!!MLX5_CAP_ETH(mdev, lro_cap) && + mlx5e_check_fragmented_striding_rq_cap(mdev)) + netdev->vlan_features |= NETIF_F_LRO; + + netdev->hw_features = netdev->vlan_features; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; + + if (mlx5_vxlan_allowed(mdev->vxlan) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_enc_features |= NETIF_F_IP_CSUM; + netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; + netdev->hw_enc_features |= NETIF_F_TSO; + netdev->hw_enc_features |= NETIF_F_TSO6; + netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; + } + + if (mlx5_vxlan_allowed(mdev->vxlan)) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; + } + + if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->gso_partial_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + } + + netdev->hw_features |= NETIF_F_GSO_PARTIAL; + netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4; + netdev->hw_features |= NETIF_F_GSO_UDP_L4; + netdev->features |= NETIF_F_GSO_UDP_L4; + + mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); + + if (fcs_supported) + netdev->hw_features |= NETIF_F_RXALL; + + if (MLX5_CAP_ETH(mdev, scatter_fcs)) + netdev->hw_features |= NETIF_F_RXFCS; + + netdev->features = netdev->hw_features; + if (!priv->channels.params.lro_en) + netdev->features &= ~NETIF_F_LRO; + + if (fcs_enabled) + netdev->features &= ~NETIF_F_RXALL; + + if (!priv->channels.params.scatter_fcs_en) + netdev->features &= ~NETIF_F_RXFCS; + + /* prefere CQE compression over rxhash */ + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) + netdev->features &= ~NETIF_F_RXHASH; + +#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) + if (FT_CAP(flow_modify_en) && + FT_CAP(modify_root) && + FT_CAP(identified_miss_table_mode) && + FT_CAP(flow_table_modify)) { +#ifdef CONFIG_MLX5_ESWITCH + netdev->hw_features |= NETIF_F_HW_TC; +#endif +#ifdef CONFIG_MLX5_EN_ARFS + netdev->hw_features |= NETIF_F_NTUPLE; +#endif + } + + netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; + + netdev->priv_flags |= IFF_UNICAST_FLT; + + mlx5e_set_netdev_dev_addr(netdev); + +#if IS_ENABLED(CONFIG_MLX5_ESWITCH) + if (MLX5_ESWITCH_MANAGER(mdev)) + netdev->switchdev_ops = &mlx5e_switchdev_ops; +#endif + + mlx5e_ipsec_build_netdev(priv); + mlx5e_tls_build_netdev(priv); +} + +static void mlx5e_create_q_counters(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_core_alloc_q_counter(mdev, &priv->q_counter); + if (err) { + mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err); + priv->q_counter = 0; + } + + err = mlx5_core_alloc_q_counter(mdev, &priv->drop_rq_q_counter); + if (err) { + mlx5_core_warn(mdev, "alloc drop RQ counter failed, %d\n", err); + priv->drop_rq_q_counter = 0; + } +} + +static void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) +{ + if (priv->q_counter) + mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter); + + if (priv->drop_rq_q_counter) + mlx5_core_dealloc_q_counter(priv->mdev, priv->drop_rq_q_counter); +} + +static void mlx5e_nic_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv); + err = mlx5e_ipsec_init(priv); + if (err) + mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err); + err = mlx5e_tls_init(priv); + if (err) + mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); + mlx5e_build_nic_netdev(netdev); + mlx5e_build_tc2txq_maps(priv); +} + +static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) +{ + mlx5e_tls_cleanup(priv); + mlx5e_ipsec_cleanup(priv); +} + +static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5e_create_indirect_rqt(priv); + if (err) + return err; + + err = mlx5e_create_direct_rqts(priv); + if (err) + goto err_destroy_indirect_rqts; + + err = mlx5e_create_indirect_tirs(priv); + if (err) + goto err_destroy_direct_rqts; + + err = mlx5e_create_direct_tirs(priv); + if (err) + goto err_destroy_indirect_tirs; + + err = mlx5e_create_flow_steering(priv); + if (err) { + mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); + goto err_destroy_direct_tirs; + } + + err = mlx5e_tc_nic_init(priv); + if (err) + goto err_destroy_flow_steering; + + return 0; + +err_destroy_flow_steering: + mlx5e_destroy_flow_steering(priv); +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_indirect_tirs: + mlx5e_destroy_indirect_tirs(priv); +err_destroy_direct_rqts: + mlx5e_destroy_direct_rqts(priv); +err_destroy_indirect_rqts: + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + return err; +} + +static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) +{ + mlx5e_tc_nic_cleanup(priv); + mlx5e_destroy_flow_steering(priv); + mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_indirect_tirs(priv); + mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); +} + +static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_tises(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; + } + +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_initialize(priv); +#endif + return 0; +} + +static void mlx5e_nic_enable(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 max_mtu; + + mlx5e_init_l2_addr(priv); + + /* Marking the link as currently not needed by the Driver */ + if (!netif_running(netdev)) + mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); + + /* MTU range: 68 - hw-specific max */ + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); + mlx5e_set_dev_port_mtu(priv); + + mlx5_lag_add(mdev, netdev); + + mlx5e_enable_async_events(priv); + + if (MLX5_ESWITCH_MANAGER(priv->mdev)) + mlx5e_register_vport_reps(priv); + + if (netdev->reg_state != NETREG_REGISTERED) + return; +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_init_app(priv); +#endif + + queue_work(priv->wq, &priv->set_rx_mode_work); + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + netif_device_attach(netdev); + rtnl_unlock(); +} + +static void mlx5e_nic_disable(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (priv->netdev->reg_state == NETREG_REGISTERED) + mlx5e_dcbnl_delete_app(priv); +#endif + + rtnl_lock(); + if (netif_running(priv->netdev)) + mlx5e_close(priv->netdev); + netif_device_detach(priv->netdev); + rtnl_unlock(); + + queue_work(priv->wq, &priv->set_rx_mode_work); + + if (MLX5_ESWITCH_MANAGER(priv->mdev)) + mlx5e_unregister_vport_reps(priv); + + mlx5e_disable_async_events(priv); + mlx5_lag_remove(mdev); +} + +static const struct mlx5e_profile mlx5e_nic_profile = { + .init = mlx5e_nic_init, + .cleanup = mlx5e_nic_cleanup, + .init_rx = mlx5e_init_nic_rx, + .cleanup_rx = mlx5e_cleanup_nic_rx, + .init_tx = mlx5e_init_nic_tx, + .cleanup_tx = mlx5e_cleanup_nic_tx, + .enable = mlx5e_nic_enable, + .disable = mlx5e_nic_disable, + .update_stats = mlx5e_update_ndo_stats, + .max_nch = mlx5e_get_max_num_channels, + .update_carrier = mlx5e_update_carrier, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .max_tc = MLX5E_MAX_NUM_TC, +}; + +/* mlx5e generic netdev management API (move to en_common.c) */ + +struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + int nch = profile->max_nch(mdev); + struct net_device *netdev; + struct mlx5e_priv *priv; + + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), + nch * profile->max_tc, + nch); + if (!netdev) { + mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); + return NULL; + } + +#ifdef CONFIG_MLX5_EN_ARFS + netdev->rx_cpu_rmap = mdev->rmap; +#endif + + profile->init(mdev, netdev, profile, ppriv); + + netif_carrier_off(netdev); + + priv = netdev_priv(netdev); + + priv->wq = create_singlethread_workqueue("mlx5e"); + if (!priv->wq) + goto err_cleanup_nic; + + return netdev; + +err_cleanup_nic: + if (profile->cleanup) + profile->cleanup(priv); + free_netdev(netdev); + + return NULL; +} + +int mlx5e_attach_netdev(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + const struct mlx5e_profile *profile; + int max_nch; + int err; + + profile = priv->profile; + clear_bit(MLX5E_STATE_DESTROYING, &priv->state); + + /* max number of channels may have changed */ + max_nch = mlx5e_get_max_num_channels(priv->mdev); + if (priv->channels.params.num_channels > max_nch) { + mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); + priv->channels.params.num_channels = max_nch; + mlx5e_build_default_indir_rqt(priv->channels.params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, max_nch); + } + + err = profile->init_tx(priv); + if (err) + goto out; + + mlx5e_create_q_counters(priv); + + err = mlx5e_open_drop_rq(priv, &priv->drop_rq); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_destroy_q_counters; + } + + err = profile->init_rx(priv); + if (err) + goto err_close_drop_rq; + + if (profile->enable) + profile->enable(priv); + + return 0; + +err_close_drop_rq: + mlx5e_close_drop_rq(&priv->drop_rq); + +err_destroy_q_counters: + mlx5e_destroy_q_counters(priv); + profile->cleanup_tx(priv); + +out: + return err; +} + +void mlx5e_detach_netdev(struct mlx5e_priv *priv) +{ + const struct mlx5e_profile *profile = priv->profile; + + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + + if (profile->disable) + profile->disable(priv); + flush_workqueue(priv->wq); + + profile->cleanup_rx(priv); + mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_destroy_q_counters(priv); + profile->cleanup_tx(priv); + cancel_delayed_work_sync(&priv->update_stats_work); +} + +void mlx5e_destroy_netdev(struct mlx5e_priv *priv) +{ + const struct mlx5e_profile *profile = priv->profile; + struct net_device *netdev = priv->netdev; + + destroy_workqueue(priv->wq); + if (profile->cleanup) + profile->cleanup(priv); + free_netdev(netdev); +} + +/* mlx5e_attach and mlx5e_detach scope should be only creating/destroying + * hardware contexts and to connect it to the current netdev. + */ +static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + int err; + + if (netif_device_present(netdev)) + return 0; + + err = mlx5e_create_mdev_resources(mdev); + if (err) + return err; + + err = mlx5e_attach_netdev(priv); + if (err) { + mlx5e_destroy_mdev_resources(mdev); + return err; + } + + return 0; +} + +static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + + if (!netif_device_present(netdev)) + return; + + mlx5e_detach_netdev(priv); + mlx5e_destroy_mdev_resources(mdev); +} + +static void *mlx5e_add(struct mlx5_core_dev *mdev) +{ + struct net_device *netdev; + void *rpriv = NULL; + void *priv; + int err; + + err = mlx5e_check_required_hca_cap(mdev); + if (err) + return NULL; + +#ifdef CONFIG_MLX5_ESWITCH + if (MLX5_ESWITCH_MANAGER(mdev)) { + rpriv = mlx5e_alloc_nic_rep_priv(mdev); + if (!rpriv) { + mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n"); + return NULL; + } + } +#endif + + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv); + if (!netdev) { + mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); + goto err_free_rpriv; + } + + priv = netdev_priv(netdev); + + err = mlx5e_attach(mdev, priv); + if (err) { + mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err); + goto err_destroy_netdev; + } + + err = register_netdev(netdev); + if (err) { + mlx5_core_err(mdev, "register_netdev failed, %d\n", err); + goto err_detach; + } + +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_init_app(priv); +#endif + return priv; + +err_detach: + mlx5e_detach(mdev, priv); +err_destroy_netdev: + mlx5e_destroy_netdev(priv); +err_free_rpriv: + kfree(rpriv); + return NULL; +} + +static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + void *ppriv = priv->ppriv; + +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_delete_app(priv); +#endif + unregister_netdev(priv->netdev); + mlx5e_detach(mdev, vpriv); + mlx5e_destroy_netdev(priv); + kfree(ppriv); +} + +static void *mlx5e_get_netdev(void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + + return priv->netdev; +} + +static struct mlx5_interface mlx5e_interface = { + .add = mlx5e_add, + .remove = mlx5e_remove, + .attach = mlx5e_attach, + .detach = mlx5e_detach, + .event = mlx5e_async_event, + .protocol = MLX5_INTERFACE_PROTOCOL_ETH, + .get_dev = mlx5e_get_netdev, +}; + +void mlx5e_init(void) +{ + mlx5e_ipsec_build_inverse_table(); + mlx5e_build_ptys2ethtool_map(); + mlx5_register_interface(&mlx5e_interface); +} + +void mlx5e_cleanup(void) +{ + mlx5_unregister_interface(&mlx5e_interface); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c new file mode 100644 index 000000000..1ab40d622 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -0,0 +1,1295 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <generated/utsrelease.h> +#include <linux/mlx5/fs.h> +#include <net/switchdev.h> +#include <net/pkt_cls.h> +#include <net/act_api.h> +#include <net/netevent.h> +#include <net/arp.h> + +#include "eswitch.h" +#include "en.h" +#include "en_rep.h" +#include "en_tc.h" +#include "fs_core.h" + +#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \ + max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) +#define MLX5E_REP_PARAMS_LOG_RQ_SIZE \ + max(0x6, MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE) + +static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; + +static void mlx5e_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->driver, mlx5e_rep_driver_name, + sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version)); +} + +static const struct counter_desc sw_rep_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, +}; + +struct vport_stats { + u64 vport_rx_packets; + u64 vport_tx_packets; + u64 vport_rx_bytes; + u64 vport_tx_bytes; +}; + +static const struct counter_desc vport_rep_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_packets) }, + { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_bytes) }, + { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_packets) }, + { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_bytes) }, +}; + +#define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc) +#define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc) + +static void mlx5e_rep_get_strings(struct net_device *dev, + u32 stringset, uint8_t *data) +{ + int i, j; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) + strcpy(data + (i * ETH_GSTRING_LEN), + sw_rep_stats_desc[i].format); + for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++) + strcpy(data + (i * ETH_GSTRING_LEN), + vport_rep_stats_desc[j].format); + break; + } +} + +static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct rtnl_link_stats64 *vport_stats; + struct ifla_vf_stats vf_stats; + int err; + + err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats); + if (err) { + pr_warn("vport %d error %d reading stats\n", rep->vport, err); + return; + } + + vport_stats = &priv->stats.vf_vport; + /* flip tx/rx as we are reporting the counters for the switch vport */ + vport_stats->rx_packets = vf_stats.tx_packets; + vport_stats->rx_bytes = vf_stats.tx_bytes; + vport_stats->tx_packets = vf_stats.rx_packets; + vport_stats->tx_bytes = vf_stats.rx_bytes; +} + +static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_sw_stats *s = &priv->stats.sw; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_sq_stats *sq_stats; + int i, j; + + memset(s, 0, sizeof(*s)); + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + rq_stats = c->rq.stats; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + + for (j = 0; j < priv->channels.params.num_tc; j++) { + sq_stats = c->sq[j].stats; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_queue_dropped += sq_stats->dropped; + } + } +} + +static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int i, j; + + if (!data) + return; + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_rep_update_sw_counters(priv); + mlx5e_rep_update_hw_counters(priv); + mutex_unlock(&priv->state_lock); + + for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) + data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_rep_stats_desc, i); + + for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++) + data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport, + vport_rep_stats_desc, j); +} + +static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return NUM_VPORT_REP_SW_COUNTERS + NUM_VPORT_REP_HW_COUNTERS; + default: + return -EOPNOTSUPP; + } +} + +static const struct ethtool_ops mlx5e_rep_ethtool_ops = { + .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_rep_get_strings, + .get_sset_count = mlx5e_rep_get_sset_count, + .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, +}; + +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (esw->mode != SRIOV_OFFLOADS) + return -EOPNOTSUPP; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + attr->u.ppid.id_len = ETH_ALEN; + ether_addr_copy(attr->u.ppid.id, rep->hw_id); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_sq *rep_sq, *tmp; + struct mlx5e_rep_priv *rpriv; + + if (esw->mode != SRIOV_OFFLOADS) + return; + + rpriv = mlx5e_rep_to_rep_priv(rep); + list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) { + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); + list_del(&rep_sq->list); + kfree(rep_sq); + } +} + +static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + u32 *sqns_array, int sqns_num) +{ + struct mlx5_flow_handle *flow_rule; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_sq *rep_sq; + int err; + int i; + + if (esw->mode != SRIOV_OFFLOADS) + return 0; + + rpriv = mlx5e_rep_to_rep_priv(rep); + for (i = 0; i < sqns_num; i++) { + rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL); + if (!rep_sq) { + err = -ENOMEM; + goto out_err; + } + + /* Add re-inject rule to the PF/representor sqs */ + flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, + rep->vport, + sqns_array[i]); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + kfree(rep_sq); + goto out_err; + } + rep_sq->send_to_vport_rule = flow_rule; + list_add(&rep_sq->list, &rpriv->vport_sqs_list); + } + return 0; + +out_err: + mlx5e_sqs2vport_stop(esw, rep); + return err; +} + +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5e_channel *c; + int n, tc, num_sqs = 0; + int err = -ENOMEM; + u32 *sqs; + + sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(*sqs), GFP_KERNEL); + if (!sqs) + goto out; + + for (n = 0; n < priv->channels.num; n++) { + c = priv->channels.c[n]; + for (tc = 0; tc < c->num_tc; tc++) + sqs[num_sqs++] = c->sq[tc].sqn; + } + + err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs); + kfree(sqs); + +out: + if (err) + netdev_warn(priv->netdev, "Failed to add SQs FWD rules %d\n", err); + return err; +} + +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + mlx5e_sqs2vport_stop(esw, rep); +} + +static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) +{ +#if IS_ENABLED(CONFIG_IPV6) + unsigned long ipv6_interval = NEIGH_VAR(&nd_tbl.parms, + DELAY_PROBE_TIME); +#else + unsigned long ipv6_interval = ~0UL; +#endif + unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, + DELAY_PROBE_TIME); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + + rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); + mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); +} + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + + mlx5_fc_queue_stats_work(priv->mdev, + &neigh_update->neigh_stats_work, + neigh_update->min_interval); +} + +static void mlx5e_rep_neigh_stats_work(struct work_struct *work) +{ + struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, + neigh_update.neigh_stats_work.work); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe; + + rtnl_lock(); + if (!list_empty(&rpriv->neigh_update.neigh_list)) + mlx5e_rep_queue_neigh_stats_work(priv); + + list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list) + mlx5e_tc_update_neigh_used_value(nhe); + + rtnl_unlock(); +} + +static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) +{ + refcount_inc(&nhe->refcnt); +} + +static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) +{ + if (refcount_dec_and_test(&nhe->refcnt)) + kfree(nhe); +} + +static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + bool neigh_connected, + unsigned char ha[ETH_ALEN]) +{ + struct ethhdr *eth = (struct ethhdr *)e->encap_header; + + ASSERT_RTNL(); + + if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) || + !ether_addr_equal(e->h_dest, ha)) + mlx5e_tc_encap_flows_del(priv, e); + + if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { + ether_addr_copy(e->h_dest, ha); + ether_addr_copy(eth->h_dest, ha); + + mlx5e_tc_encap_flows_add(priv, e); + } +} + +static void mlx5e_rep_neigh_update(struct work_struct *work) +{ + struct mlx5e_neigh_hash_entry *nhe = + container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); + struct neighbour *n = nhe->n; + struct mlx5e_encap_entry *e; + unsigned char ha[ETH_ALEN]; + struct mlx5e_priv *priv; + bool neigh_connected; + bool encap_connected; + u8 nud_state, dead; + + rtnl_lock(); + + /* If these parameters are changed after we release the lock, + * we'll receive another event letting us know about it. + * We use this lock to avoid inconsistency between the neigh validity + * and it's hw address. + */ + read_lock_bh(&n->lock); + memcpy(ha, n->ha, ETH_ALEN); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + + neigh_connected = (nud_state & NUD_VALID) && !dead; + + list_for_each_entry(e, &nhe->encap_list, encap_list) { + encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); + priv = netdev_priv(e->out_dev); + + if (encap_connected != neigh_connected || + !ether_addr_equal(e->h_dest, ha)) + mlx5e_rep_update_flows(priv, e, neigh_connected, ha); + } + mlx5e_rep_neigh_entry_release(nhe); + rtnl_unlock(); + neigh_release(n); +} + +static struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh); + +static int mlx5e_rep_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + neigh_update.netevent_nb); + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + struct mlx5e_neigh m_neigh = {}; + struct neigh_parms *p; + struct neighbour *n; + bool found = false; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + n = ptr; +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != &nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + m_neigh.dev = n->dev; + m_neigh.family = n->ops->family; + memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + + /* We are in atomic context and can't take RTNL mutex, so use + * spin_lock_bh to lookup the neigh table. bh is used since + * netevent can be called from a softirq context. + */ + spin_lock_bh(&neigh_update->encap_lock); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); + if (!nhe) { + spin_unlock_bh(&neigh_update->encap_lock); + return NOTIFY_DONE; + } + + /* This assignment is valid as long as the the neigh reference + * is taken + */ + nhe->n = n; + + /* Take a reference to ensure the neighbour and mlx5 encap + * entry won't be destructed until we drop the reference in + * delayed work. + */ + neigh_hold(n); + mlx5e_rep_neigh_entry_hold(nhe); + + if (!queue_work(priv->wq, &nhe->neigh_update_work)) { + mlx5e_rep_neigh_entry_release(nhe); + neigh_release(n); + } + spin_unlock_bh(&neigh_update->encap_lock); + break; + + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We check the device is present since we don't care about + * changes in the default table, we only care about changes + * done per device delay prob time parameter. + */ +#if IS_ENABLED(CONFIG_IPV6) + if (!p->dev || (p->tbl != &nd_tbl && p->tbl != &arp_tbl)) +#else + if (!p->dev || p->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + /* We are in atomic context and can't take RTNL mutex, + * so use spin_lock_bh to walk the neigh list and look for + * the relevant device. bh is used since netevent can be + * called from a softirq context. + */ + spin_lock_bh(&neigh_update->encap_lock); + list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) { + if (p->dev == nhe->m_neigh.dev) { + found = true; + break; + } + } + spin_unlock_bh(&neigh_update->encap_lock); + if (!found) + return NOTIFY_DONE; + + neigh_update->min_interval = min_t(unsigned long, + NEIGH_VAR(p, DELAY_PROBE_TIME), + neigh_update->min_interval); + mlx5_fc_update_sampling_interval(priv->mdev, + neigh_update->min_interval); + break; + } + return NOTIFY_DONE; +} + +static const struct rhashtable_params mlx5e_neigh_ht_params = { + .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), + .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), + .key_len = sizeof(struct mlx5e_neigh), + .automatic_shrinking = true, +}; + +static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + int err; + + err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); + if (err) + return err; + + INIT_LIST_HEAD(&neigh_update->neigh_list); + spin_lock_init(&neigh_update->encap_lock); + INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, + mlx5e_rep_neigh_stats_work); + mlx5e_rep_neigh_update_init_interval(rpriv); + + rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; + err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); + if (err) + goto out_err; + return 0; + +out_err: + rhashtable_destroy(&neigh_update->neigh_ht); + return err; +} + +static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + unregister_netevent_notifier(&neigh_update->netevent_nb); + + flush_workqueue(priv->wq); /* flush neigh update works */ + + cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); + + rhashtable_destroy(&neigh_update->neigh_ht); +} + +static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int err; + + err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + if (err) + return err; + + list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); + + return err; +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + spin_lock_bh(&rpriv->neigh_update.encap_lock); + + list_del(&nhe->neigh_list); + + rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + spin_unlock_bh(&rpriv->neigh_update.encap_lock); +} + +/* This function must only be called under RTNL lock or under the + * representor's encap_lock in case RTNL mutex can't be held. + */ +static struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + + return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, + mlx5e_neigh_ht_params); +} + +static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh_hash_entry **nhe) +{ + int err; + + *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); + if (!*nhe) + return -ENOMEM; + + memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); + INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); + INIT_LIST_HEAD(&(*nhe)->encap_list); + refcount_set(&(*nhe)->refcnt, 1); + + err = mlx5e_rep_neigh_entry_insert(priv, *nhe); + if (err) + goto out_free; + return 0; + +out_free: + kfree(*nhe); + return err; +} + +static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + /* The neigh hash entry must be removed from the hash table regardless + * of the reference count value, so it won't be found by the next + * neigh notification call. The neigh hash entry reference count is + * incremented only during creation and neigh notification calls and + * protects from freeing the nhe struct. + */ + mlx5e_rep_neigh_entry_remove(priv, nhe); + mlx5e_rep_neigh_entry_release(nhe); +} + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_neigh_hash_entry *nhe; + int err; + + nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + if (!nhe) { + err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); + if (err) + return err; + } + list_add(&e->encap_list, &nhe->encap_list); + return 0; +} + +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_neigh_hash_entry *nhe; + + list_del(&e->encap_list); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + + if (list_empty(&nhe->encap_list)) + mlx5e_rep_neigh_entry_destroy(priv, nhe); +} + +static int mlx5e_rep_open(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_open_locked(dev); + if (err) + goto unlock; + + if (!mlx5_modify_vport_admin_state(priv->mdev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + rep->vport, MLX5_VPORT_ADMIN_STATE_UP)) + netif_carrier_on(dev); + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_rep_close(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + int ret; + + mutex_lock(&priv->state_lock); + mlx5_modify_vport_admin_state(priv->mdev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN); + ret = mlx5e_close_locked(dev); + mutex_unlock(&priv->state_lock); + return ret; +} + +static int mlx5e_rep_get_phys_port_name(struct net_device *dev, + char *buf, size_t len) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + int ret; + + ret = snprintf(buf, len, "%d", rep->vport - 1); + if (ret >= len) + return -EOPNOTSUPP; + + return 0; +} + +static int +mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *cls_flower, int flags) +{ + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, cls_flower, flags); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, cls_flower, flags); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, cls_flower, flags); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct mlx5e_priv *priv = cb_priv; + + if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct mlx5e_priv *priv = cb_priv; + + if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb, + priv, priv, f->extack); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return mlx5e_rep_setup_tc_block(dev, type_data); + default: + return -EOPNOTSUPP; + } +} + +bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; + + if (!MLX5_ESWITCH_MANAGER(priv->mdev)) + return false; + + rep = rpriv->rep; + if (esw->mode == SRIOV_OFFLOADS && + rep && rep->vport == FDB_UPLINK_VPORT) + return true; + + return false; +} + +static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; + + if (!MLX5_ESWITCH_MANAGER(priv->mdev)) + return false; + + rep = rpriv->rep; + if (rep && rep->vport != FDB_UPLINK_VPORT) + return true; + + return false; +} + +bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv)) + return true; + } + + return false; +} + +static int +mlx5e_get_sw_stats64(const struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_sw_stats *sstats = &priv->stats.sw; + + mlx5e_rep_update_sw_counters(priv); + + stats->rx_packets = sstats->rx_packets; + stats->rx_bytes = sstats->rx_bytes; + stats->tx_packets = sstats->tx_packets; + stats->tx_bytes = sstats->tx_bytes; + + stats->tx_dropped = sstats->tx_queue_dropped; + + return 0; +} + +int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return mlx5e_get_sw_stats64(dev, sp); + } + + return -EINVAL; +} + +static void +mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + /* update HW stats in background for next time */ + queue_delayed_work(priv->wq, &priv->update_stats_work, 0); + + memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); +} + +static const struct switchdev_ops mlx5e_rep_switchdev_ops = { + .switchdev_port_attr_get = mlx5e_attr_get, +}; + +static int mlx5e_change_rep_mtu(struct net_device *netdev, int new_mtu) +{ + return mlx5e_change_mtu(netdev, new_mtu, NULL); +} + +static const struct net_device_ops mlx5e_netdev_ops_rep = { + .ndo_open = mlx5e_rep_open, + .ndo_stop = mlx5e_rep_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, + .ndo_setup_tc = mlx5e_rep_setup_tc, + .ndo_get_stats64 = mlx5e_rep_get_stats, + .ndo_has_offload_stats = mlx5e_has_offload_stats, + .ndo_get_offload_stats = mlx5e_get_offload_stats, + .ndo_change_mtu = mlx5e_change_rep_mtu, +}; + +static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u16 mtu) +{ + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + + params->hard_mtu = MLX5E_ETH_HARD_MTU; + params->sw_mtu = mtu; + params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE; + params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; + params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE; + + params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, cq_period_mode); + + params->num_tc = 1; + params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); +} + +static void mlx5e_build_rep_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 max_mtu; + + netdev->netdev_ops = &mlx5e_netdev_ops_rep; + + netdev->watchdog_timeo = 15 * HZ; + + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; + + netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; + + netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; + netdev->hw_features |= NETIF_F_HW_TC; + + eth_hw_addr_random(netdev); + + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); +} + +static void mlx5e_init_rep(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; + + mutex_init(&priv->state_lock); + + INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + + priv->channels.params.num_channels = profile->max_nch(mdev); + + mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu); + mlx5e_build_rep_netdev(netdev); + + mlx5e_timestamp_init(priv); +} + +static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_flow_handle *flow_rule; + int err; + + mlx5e_init_l2_addr(priv); + + err = mlx5e_create_direct_rqts(priv); + if (err) + return err; + + err = mlx5e_create_direct_tirs(priv); + if (err) + goto err_destroy_direct_rqts; + + flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, + rep->vport, + priv->direct_tir[0].tirn); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto err_destroy_direct_tirs; + } + rpriv->vport_rx_rule = flow_rule; + + return 0; + +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_direct_rqts: + mlx5e_destroy_direct_rqts(priv); + return err; +} + +static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + mlx5_del_flow_rules(rpriv->vport_rx_rule); + mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_rqts(priv); +} + +static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_tises(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; + } + return 0; +} + +static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev) +{ +#define MLX5E_PORT_REPRESENTOR_NCH 1 + return MLX5E_PORT_REPRESENTOR_NCH; +} + +static const struct mlx5e_profile mlx5e_rep_profile = { + .init = mlx5e_init_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_nic_tx, + .update_stats = mlx5e_rep_update_hw_counters, + .max_nch = mlx5e_get_rep_max_num_channels, + .update_carrier = NULL, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, + .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */, + .max_tc = 1, +}; + +/* e-Switch vport representors */ + +static int +mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + int err; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_add_sqs_fwd_rules(priv); + if (err) + return err; + } + + err = mlx5e_rep_neigh_init(rpriv); + if (err) + goto err_remove_sqs; + + /* init shared tc flow table */ + err = mlx5e_tc_esw_init(&rpriv->tc_ht); + if (err) + goto err_neigh_cleanup; + + return 0; + +err_neigh_cleanup: + mlx5e_rep_neigh_cleanup(rpriv); +err_remove_sqs: + mlx5e_remove_sqs_fwd_rules(priv); + return err; +} + +static void +mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_remove_sqs_fwd_rules(priv); + + /* clean uplink offloaded TC rules, delete shared tc flow table */ + mlx5e_tc_esw_cleanup(&rpriv->tc_ht); + + mlx5e_rep_neigh_cleanup(rpriv); +} + +static int +mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_rep_priv *rpriv; + struct net_device *netdev; + struct mlx5e_priv *upriv; + int err; + + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return -ENOMEM; + + netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, rpriv); + if (!netdev) { + pr_warn("Failed to create representor netdev for vport %d\n", + rep->vport); + kfree(rpriv); + return -EINVAL; + } + + rpriv->netdev = netdev; + rpriv->rep = rep; + rep->rep_if[REP_ETH].priv = rpriv; + INIT_LIST_HEAD(&rpriv->vport_sqs_list); + + err = mlx5e_attach_netdev(netdev_priv(netdev)); + if (err) { + pr_warn("Failed to attach representor netdev for vport %d\n", + rep->vport); + goto err_destroy_netdev; + } + + err = mlx5e_rep_neigh_init(rpriv); + if (err) { + pr_warn("Failed to initialized neighbours handling for vport %d\n", + rep->vport); + goto err_detach_netdev; + } + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH); + upriv = netdev_priv(uplink_rpriv->netdev); + err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev, + upriv); + if (err) + goto err_neigh_cleanup; + + err = register_netdev(netdev); + if (err) { + pr_warn("Failed to register representor netdev for vport %d\n", + rep->vport); + goto err_egdev_cleanup; + } + + return 0; + +err_egdev_cleanup: + tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev, + upriv); + +err_neigh_cleanup: + mlx5e_rep_neigh_cleanup(rpriv); + +err_detach_netdev: + mlx5e_detach_netdev(netdev_priv(netdev)); + +err_destroy_netdev: + mlx5e_destroy_netdev(netdev_priv(netdev)); + kfree(rpriv); + return err; +} + +static void +mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *uplink_rpriv; + void *ppriv = priv->ppriv; + struct mlx5e_priv *upriv; + + unregister_netdev(netdev); + uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, + REP_ETH); + upriv = netdev_priv(uplink_rpriv->netdev); + tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev, + upriv); + mlx5e_rep_neigh_cleanup(rpriv); + mlx5e_detach_netdev(priv); + mlx5e_destroy_netdev(priv); + kfree(ppriv); /* mlx5e_rep_priv */ +} + +static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; + + rpriv = mlx5e_rep_to_rep_priv(rep); + + return rpriv->netdev; +} + +static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + + for (vport = 1; vport < total_vfs; vport++) { + struct mlx5_eswitch_rep_if rep_if = {}; + + rep_if.load = mlx5e_vport_rep_load; + rep_if.unload = mlx5e_vport_rep_unload; + rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; + mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH); + } +} + +static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH); +} + +void mlx5e_register_vport_reps(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep_if rep_if; + struct mlx5e_rep_priv *rpriv; + + rpriv = priv->ppriv; + rpriv->netdev = priv->netdev; + + rep_if.load = mlx5e_nic_rep_load; + rep_if.unload = mlx5e_nic_rep_unload; + rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; + rep_if.priv = rpriv; + INIT_LIST_HEAD(&rpriv->vport_sqs_list); + mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/ + + mlx5e_rep_register_vf_vports(priv); /* VFs vports */ +} + +void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */ + mlx5_eswitch_unregister_vport_rep(esw, 0, REP_ETH); /* UPLINK PF*/ +} + +void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv; + + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return NULL; + + rpriv->rep = &esw->offloads.vport_reps[0]; + return rpriv; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h new file mode 100644 index 000000000..844d32d5c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_REP_H__ +#define __MLX5E_REP_H__ + +#include <net/ip_tunnels.h> +#include <linux/rhashtable.h> +#include "eswitch.h" +#include "en.h" + +#ifdef CONFIG_MLX5_ESWITCH +struct mlx5e_neigh_update_table { + struct rhashtable neigh_ht; + /* Save the neigh hash entries in a list in addition to the hash table + * (neigh_ht). In order to iterate easily over the neigh entries. + * Used for stats query. + */ + struct list_head neigh_list; + /* protect lookup/remove operations */ + spinlock_t encap_lock; + struct notifier_block netevent_nb; + struct delayed_work neigh_stats_work; + unsigned long min_interval; /* jiffies */ +}; + +struct mlx5e_rep_priv { + struct mlx5_eswitch_rep *rep; + struct mlx5e_neigh_update_table neigh_update; + struct net_device *netdev; + struct mlx5_flow_handle *vport_rx_rule; + struct list_head vport_sqs_list; + struct rhashtable tc_ht; /* valid for uplink rep */ +}; + +static inline +struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) +{ + return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv; +} + +struct mlx5e_neigh { + struct net_device *dev; + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; + int family; +}; + +struct mlx5e_neigh_hash_entry { + struct rhash_head rhash_node; + struct mlx5e_neigh m_neigh; + + /* Save the neigh hash entry in a list on the representor in + * addition to the hash table. In order to iterate easily over the + * neighbour entries. Used for stats query. + */ + struct list_head neigh_list; + + /* encap list sharing the same neigh */ + struct list_head encap_list; + + /* valid only when the neigh reference is taken during + * neigh_update_work workqueue callback. + */ + struct neighbour *n; + struct work_struct neigh_update_work; + + /* neigh hash entry can be deleted only when the refcount is zero. + * refcount is needed to avoid neigh hash entry removal by TC, while + * it's used by the neigh notification call. + */ + refcount_t refcnt; + + /* Save the last reported time offloaded trafic pass over one of the + * neigh hash entry flows. Use it to periodically update the neigh + * 'used' value and avoid neigh deleting by the kernel. + */ + unsigned long reported_lastuse; +}; + +enum { + /* set when the encap entry is successfully offloaded into HW */ + MLX5_ENCAP_ENTRY_VALID = BIT(0), +}; + +struct mlx5e_encap_entry { + /* neigh hash entry list of encaps sharing the same neigh */ + struct list_head encap_list; + struct mlx5e_neigh m_neigh; + /* a node of the eswitch encap hash table which keeping all the encap + * entries + */ + struct hlist_node encap_hlist; + struct list_head flows; + u32 encap_id; + struct ip_tunnel_info tun_info; + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + + struct net_device *out_dev; + int tunnel_type; + u8 flags; + char *encap_header; + int encap_size; +}; + +struct mlx5e_rep_sq { + struct mlx5_flow_handle *send_to_vport_rule; + struct list_head list; +}; + +void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev); +void mlx5e_register_vport_reps(struct mlx5e_priv *priv); +void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv); +bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); + +int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp); +bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id); + +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); +#else /* CONFIG_MLX5_ESWITCH */ +static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {} +static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {} +static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } +static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {} +#endif + +#endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c new file mode 100644 index 000000000..9d86e49a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -0,0 +1,1470 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/prefetch.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <net/busy_poll.h> +#include <net/ip6_checksum.h> +#include <net/page_pool.h> +#include <net/inet_ecn.h> +#include "en.h" +#include "en_tc.h" +#include "eswitch.h" +#include "en_rep.h" +#include "ipoib/ipoib.h" +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/tls_rxtx.h" +#include "lib/clock.h" +#include "en/xdp.h" + +static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) +{ + return config->rx_filter == HWTSTAMP_FILTER_ALL; +} + +static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc, + void *data) +{ + u32 ci = mlx5_cqwq_ctr2ix(&cq->wq, cqcc); + + memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64)); +} + +static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) +{ + mlx5e_read_cqe_slot(cq, cqcc, &cq->title); + cq->decmprs_left = be32_to_cpu(cq->title.byte_cnt); + cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter); + rq->stats->cqe_compress_blks++; +} + +static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc) +{ + mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr); + cq->mini_arr_idx = 0; +} + +static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) +{ + struct mlx5_cqwq *wq = &cq->wq; + + u8 op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1; + u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); + u32 wq_sz = mlx5_cqwq_get_size(wq); + u32 ci_top = min_t(u32, wq_sz, ci + n); + + for (; ci < ci_top; ci++, n--) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + + cqe->op_own = op_own; + } + + if (unlikely(ci == wq_sz)) { + op_own = !op_own; + for (ci = 0; ci < n; ci++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + + cqe->op_own = op_own; + } + } +} + +static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) +{ + cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt; + cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum; + cq->title.op_own &= 0xf0; + cq->title.op_own |= 0x01 & (cqcc >> cq->wq.fbc.log_sz); + cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter); + + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + cq->decmprs_wqe_counter += + mpwrq_get_cqe_consumed_strides(&cq->title); + else + cq->decmprs_wqe_counter = + mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cq->decmprs_wqe_counter + 1); +} + +static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) +{ + mlx5e_decompress_cqe(rq, cq, cqcc); + cq->title.rss_hash_type = 0; + cq->title.rss_hash_result = 0; +} + +static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, + int update_owner_only, + int budget_rem) +{ + u32 cqcc = cq->wq.cc + update_owner_only; + u32 cqe_count; + u32 i; + + cqe_count = min_t(u32, cq->decmprs_left, budget_rem); + + for (i = update_owner_only; i < cqe_count; + i++, cq->mini_arr_idx++, cqcc++) { + if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) + mlx5e_read_mini_arr_slot(cq, cqcc); + + mlx5e_decompress_cqe_no_hash(rq, cq, cqcc); + rq->handle_rx_cqe(rq, &cq->title); + } + mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc); + cq->wq.cc = cqcc; + cq->decmprs_left -= cqe_count; + rq->stats->cqe_compress_pkts += cqe_count; + + return cqe_count; +} + +static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, + int budget_rem) +{ + mlx5e_read_title_slot(rq, cq, cq->wq.cc); + mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1); + mlx5e_decompress_cqe(rq, cq, cq->wq.cc); + rq->handle_rx_cqe(rq, &cq->title); + cq->mini_arr_idx++; + + return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; +} + +static inline bool mlx5e_page_is_reserved(struct page *page) +{ + return page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id(); +} + +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); + struct mlx5e_rq_stats *stats = rq->stats; + + if (tail_next == cache->head) { + stats->cache_full++; + return false; + } + + if (unlikely(mlx5e_page_is_reserved(dma_info->page))) { + stats->cache_waive++; + return false; + } + + cache->page_cache[cache->tail] = *dma_info; + cache->tail = tail_next; + return true; +} + +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + struct mlx5e_rq_stats *stats = rq->stats; + + if (unlikely(cache->head == cache->tail)) { + stats->cache_empty++; + return false; + } + + if (page_ref_count(cache->page_cache[cache->head].page) != 1) { + stats->cache_busy++; + return false; + } + + *dma_info = cache->page_cache[cache->head]; + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); + stats->cache_reuse++; + + dma_sync_single_for_device(rq->pdev, dma_info->addr, + PAGE_SIZE, + DMA_FROM_DEVICE); + return true; +} + +static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + if (mlx5e_rx_cache_get(rq, dma_info)) + return 0; + + dma_info->page = page_pool_dev_alloc_pages(rq->page_pool); + if (unlikely(!dma_info->page)) + return -ENOMEM; + + dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0, + PAGE_SIZE, rq->buff.map_dir); + if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { + put_page(dma_info->page); + dma_info->page = NULL; + return -ENOMEM; + } + + return 0; +} + +void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) +{ + dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir); +} + +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle) +{ + if (likely(recycle)) { + if (mlx5e_rx_cache_put(rq, dma_info)) + return; + + mlx5e_page_dma_unmap(rq, dma_info); + page_pool_recycle_direct(rq->page_pool, dma_info->page); + } else { + mlx5e_page_dma_unmap(rq, dma_info); + put_page(dma_info->page); + } +} + +static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *frag) +{ + int err = 0; + + if (!frag->offset) + /* On first frag (offset == 0), replenish page (dma_info actually). + * Other frags that point to the same dma_info (with a different + * offset) should just use the new one without replenishing again + * by themselves. + */ + err = mlx5e_page_alloc_mapped(rq, frag->di); + + return err; +} + +static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *frag, + bool recycle) +{ + if (frag->last_in_page) + mlx5e_page_release(rq, frag->di, recycle); +} + +static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) +{ + return &rq->wqe.frags[ix << rq->wqe.info.log_num_frags]; +} + +static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, + u16 ix) +{ + struct mlx5e_wqe_frag_info *frag = get_frag(rq, ix); + int err; + int i; + + for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) { + err = mlx5e_get_rx_frag(rq, frag); + if (unlikely(err)) + goto free_frags; + + wqe->data[i].addr = cpu_to_be64(frag->di->addr + + frag->offset + rq->buff.headroom); + } + + return 0; + +free_frags: + while (--i >= 0) + mlx5e_put_rx_frag(rq, --frag, true); + + return err; +} + +static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi, + bool recycle) +{ + int i; + + for (i = 0; i < rq->wqe.info.num_frags; i++, wi++) + mlx5e_put_rx_frag(rq, wi, recycle); +} + +void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix); + + mlx5e_free_rx_wqe(rq, wi, false); +} + +static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int err; + int i; + + for (i = 0; i < wqe_bulk; i++) { + struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i); + + err = mlx5e_alloc_rx_wqe(rq, wqe, ix + i); + if (unlikely(err)) + goto free_wqes; + } + + return 0; + +free_wqes: + while (--i >= 0) + mlx5e_dealloc_rx_wqe(rq, ix + i); + + return err; +} + +static inline void +mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, + struct mlx5e_dma_info *di, u32 frag_offset, u32 len, + unsigned int truesize) +{ + dma_sync_single_for_cpu(rq->pdev, + di->addr + frag_offset, + len, DMA_FROM_DEVICE); + page_ref_inc(di->page); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + di->page, frag_offset, len, truesize); +} + +static inline void +mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, + struct mlx5e_dma_info *dma_info, + int offset_from, int offset_to, u32 headlen) +{ + const void *from = page_address(dma_info->page) + offset_from; + /* Aligning len to sizeof(long) optimizes memcpy performance */ + unsigned int len = ALIGN(headlen, sizeof(long)); + + dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data_offset(skb, offset_to, from, len); +} + +static inline void +mlx5e_copy_skb_header_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_dma_info *dma_info, + u32 offset, u32 headlen) +{ + u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); + + mlx5e_copy_skb_header(pdev, skb, dma_info, offset, 0, headlen_pg); + + if (unlikely(offset + headlen > PAGE_SIZE)) { + dma_info++; + mlx5e_copy_skb_header(pdev, skb, dma_info, 0, headlen_pg, + headlen - headlen_pg); + } +} + +static void +mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) +{ + const bool no_xdp_xmit = + bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); + struct mlx5e_dma_info *dma_info = wi->umr.dma_info; + int i; + + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) + if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) + mlx5e_page_release(rq, &dma_info[i], recycle); +} + +static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + struct mlx5e_rx_wqe_ll *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + + rq->mpwqe.umr_in_progress = false; + + mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); + + /* ensure wqes are visible to device before updating doorbell record */ + dma_wmb(); + + mlx5_wq_ll_update_db_record(wq); +} + +static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq) +{ + return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; +} + +static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq, + struct mlx5_wq_cyc *wq, + u16 pi, u16 nnops) +{ + struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi]; + + edge_wi = wi + nnops; + + /* fill sq frag edge with nops to avoid wqe wrapping two pages */ + for (; wi < edge_wi; wi++) { + wi->opcode = MLX5_OPCODE_NOP; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } +} + +static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; + struct mlx5e_icosq *sq = &rq->channel->icosq; + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_umr_wqe *umr_wqe; + u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1); + u16 pi, contig_wqebbs_room; + int err; + int i; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) { + mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room); + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + } + + umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); + if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2)) + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, + offsetof(struct mlx5e_umr_wqe, inline_mtts)); + + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { + err = mlx5e_page_alloc_mapped(rq, dma_info); + if (unlikely(err)) + goto err_unmap; + umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR); + } + + bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); + wi->consumed_strides = 0; + + rq->mpwqe.umr_in_progress = true; + + umr_wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset); + + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->pc += MLX5E_UMR_WQEBBS; + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &umr_wqe->ctrl); + + return 0; + +err_unmap: + while (--i >= 0) { + dma_info--; + mlx5e_page_release(rq, dma_info, true); + } + rq->stats->buff_alloc_err++; + + return err; +} + +void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; + /* Don't recycle, this function is called on rq/netdev close */ + mlx5e_free_rx_mpwqe(rq, wi, false); +} + +bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + u8 wqe_bulk; + int err; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + return false; + + wqe_bulk = rq->wqe.info.wqe_bulk; + + if (mlx5_wq_cyc_missing(wq) < wqe_bulk) + return false; + + do { + u16 head = mlx5_wq_cyc_get_head(wq); + + err = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk); + if (unlikely(err)) { + rq->stats->buff_alloc_err++; + break; + } + + mlx5_wq_cyc_push_n(wq, wqe_bulk); + } while (mlx5_wq_cyc_missing(wq) >= wqe_bulk); + + /* ensure wqes are visible to device before updating doorbell record */ + dma_wmb(); + + mlx5_wq_cyc_update_db_record(wq); + + return !!err; +} + +static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, + struct mlx5e_icosq *sq, + struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; + + mlx5_cqwq_pop(&cq->wq); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + netdev_WARN_ONCE(cq->channel->netdev, + "Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own); + return; + } + + if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { + mlx5e_post_rx_mpwqe(rq); + return; + } + + if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) + netdev_WARN_ONCE(cq->channel->netdev, + "Bad OPCODE in ICOSQ WQE info: 0x%x\n", icowi->opcode); +} + +static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); + struct mlx5_cqe64 *cqe; + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (likely(!cqe)) + return; + + /* by design, there's only a single cqe */ + mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe); + + mlx5_cqwq_update_db_record(&cq->wq); +} + +bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + return false; + + mlx5e_poll_ico_cq(&rq->channel->icosq.cq, rq); + + if (mlx5_wq_ll_is_full(wq)) + return false; + + if (!rq->mpwqe.umr_in_progress) + mlx5e_alloc_rx_mpwqe(rq, wq->head); + else + rq->stats->congst_umr += mlx5_wq_ll_missing(wq) > 2; + + return false; +} + +static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp) +{ + u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); + u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || + (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); + + tcp->check = 0; + tcp->psh = get_cqe_lro_tcppsh(cqe); + + if (tcp_ack) { + tcp->ack = 1; + tcp->ack_seq = cqe->lro_ack_seq_num; + tcp->window = cqe->lro_tcp_win; + } +} + +static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) +{ + struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct tcphdr *tcp; + int network_depth = 0; + __wsum check; + __be16 proto; + u16 tot_len; + void *ip_p; + + proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); + + tot_len = cqe_bcnt - network_depth; + ip_p = skb->data + network_depth; + + if (proto == htons(ETH_P_IP)) { + struct iphdr *ipv4 = ip_p; + + tcp = ip_p + sizeof(struct iphdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + ipv4->ttl = cqe->lro_min_ttl; + ipv4->tot_len = cpu_to_be16(tot_len); + ipv4->check = 0; + ipv4->check = ip_fast_csum((unsigned char *)ipv4, + ipv4->ihl); + + mlx5e_lro_update_tcp_hdr(cqe, tcp); + check = csum_partial(tcp, tcp->doff * 4, + csum_unfold((__force __sum16)cqe->check_sum)); + /* Almost done, don't forget the pseudo header */ + tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr, + tot_len - sizeof(struct iphdr), + IPPROTO_TCP, check); + } else { + u16 payload_len = tot_len - sizeof(struct ipv6hdr); + struct ipv6hdr *ipv6 = ip_p; + + tcp = ip_p + sizeof(struct ipv6hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + + ipv6->hop_limit = cqe->lro_min_ttl; + ipv6->payload_len = cpu_to_be16(payload_len); + + mlx5e_lro_update_tcp_hdr(cqe, tcp); + check = csum_partial(tcp, tcp->doff * 4, + csum_unfold((__force __sum16)cqe->check_sum)); + /* Almost done, don't forget the pseudo header */ + tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len, + IPPROTO_TCP, check); + } +} + +static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, + struct sk_buff *skb) +{ + u8 cht = cqe->rss_hash_type; + int ht = (cht & CQE_RSS_HTYPE_L4) ? PKT_HASH_TYPE_L4 : + (cht & CQE_RSS_HTYPE_IP) ? PKT_HASH_TYPE_L3 : + PKT_HASH_TYPE_NONE; + skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht); +} + +static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth, + __be16 *proto) +{ + *proto = ((struct ethhdr *)skb->data)->h_proto; + *proto = __vlan_get_protocol(skb, *proto, network_depth); + + if (*proto == htons(ETH_P_IP)) + return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr)); + + if (*proto == htons(ETH_P_IPV6)) + return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr)); + + return false; +} + +static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) +{ + int network_depth = 0; + __be16 proto; + void *ip; + int rc; + + if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto))) + return; + + ip = skb->data + network_depth; + rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) : + IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip)); + + rq->stats->ecn_mark += !!rc; +} + +static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) +{ + void *ip_p = skb->data + network_depth; + + return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : + ((struct ipv6hdr *)ip_p)->nexthdr; +} + +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + +#define MAX_PADDING 8 + +static void +tail_padding_csum_slow(struct sk_buff *skb, int offset, int len, + struct mlx5e_rq_stats *stats) +{ + stats->csum_complete_tail_slow++; + skb->csum = csum_block_add(skb->csum, + skb_checksum(skb, offset, len, 0), + offset); +} + +static void +tail_padding_csum(struct sk_buff *skb, int offset, + struct mlx5e_rq_stats *stats) +{ + u8 tail_padding[MAX_PADDING]; + int len = skb->len - offset; + void *tail; + + if (unlikely(len > MAX_PADDING)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + tail = skb_header_pointer(skb, offset, len, tail_padding); + if (unlikely(!tail)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + stats->csum_complete_tail++; + skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset); +} + +static void +mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto, + struct mlx5e_rq_stats *stats) +{ + struct ipv6hdr *ip6; + struct iphdr *ip4; + int pkt_len; + + switch (proto) { + case htons(ETH_P_IP): + ip4 = (struct iphdr *)(skb->data + network_depth); + pkt_len = network_depth + ntohs(ip4->tot_len); + break; + case htons(ETH_P_IPV6): + ip6 = (struct ipv6hdr *)(skb->data + network_depth); + pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len); + break; + default: + return; + } + + if (likely(pkt_len >= skb->len)) + return; + + tail_padding_csum(skb, pkt_len, stats); +} + +static inline void mlx5e_handle_csum(struct net_device *netdev, + struct mlx5_cqe64 *cqe, + struct mlx5e_rq *rq, + struct sk_buff *skb, + bool lro) +{ + struct mlx5e_rq_stats *stats = rq->stats; + int network_depth = 0; + __be16 proto; + + if (unlikely(!(netdev->features & NETIF_F_RXCSUM))) + goto csum_none; + + if (lro) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + stats->csum_unnecessary++; + return; + } + + /* True when explicitly set via priv flag, or XDP prog is loaded */ + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)) + goto csum_unnecessary; + + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to be + * CHECKSUM_UNNECESSARY even if they are not padded. + */ + if (short_frame(skb->len)) + goto csum_unnecessary; + + if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { + if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) + goto csum_unnecessary; + + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + if (network_depth > ETH_HLEN) + /* CQE csum is calculated from the IP header and does + * not cover VLAN headers (if present). This will add + * the checksum manually. + */ + skb->csum = csum_partial(skb->data + ETH_HLEN, + network_depth - ETH_HLEN, + skb->csum); + + mlx5e_skb_padding_csum(skb, network_depth, proto, stats); + stats->csum_complete++; + return; + } + +csum_unnecessary: + if (likely((cqe->hds_ip_ext & CQE_L3_OK) && + (cqe->hds_ip_ext & CQE_L4_OK))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (cqe_is_tunneled(cqe)) { + skb->csum_level = 1; + skb->encapsulation = 1; + stats->csum_unnecessary_inner++; + return; + } + stats->csum_unnecessary++; + return; + } +csum_none: + skb->ip_summed = CHECKSUM_NONE; + stats->csum_none++; +} + +#define MLX5E_CE_BIT_MASK 0x80 + +static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct mlx5e_rq *rq, + struct sk_buff *skb) +{ + u8 lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; + struct mlx5e_rq_stats *stats = rq->stats; + struct net_device *netdev = rq->netdev; + + skb->mac_len = ETH_HLEN; + +#ifdef CONFIG_MLX5_EN_TLS + mlx5e_tls_handle_rx_skb(netdev, skb, &cqe_bcnt); +#endif + + if (lro_num_seg > 1) { + mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); + skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); + /* Subtract one since we already counted this as one + * "regular" packet in mlx5e_complete_rx_cqe() + */ + stats->packets += lro_num_seg - 1; + stats->lro_packets++; + stats->lro_bytes += cqe_bcnt; + } + + if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp))) + skb_hwtstamps(skb)->hwtstamp = + mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe)); + + skb_record_rx_queue(skb, rq->ix); + + if (likely(netdev->features & NETIF_F_RXHASH)) + mlx5e_skb_set_hash(cqe, skb); + + if (cqe_has_vlan(cqe)) { + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + be16_to_cpu(cqe->vlan_info)); + stats->removed_vlan_packets++; + } + + skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; + + mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); + /* checking CE bit in cqe - MSB in ml_path field */ + if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK)) + mlx5e_enable_ecn(rq, skb); + + skb->protocol = eth_type_trans(skb, netdev); +} + +static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + struct mlx5e_rq_stats *stats = rq->stats; + + stats->packets++; + stats->bytes += cqe_bcnt; + mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); +} + +static inline +struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, + u32 frag_size, u16 headroom, + u32 cqe_bcnt) +{ + struct sk_buff *skb = build_skb(va, frag_size); + + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + skb_reserve(skb, headroom); + skb_put(skb, cqe_bcnt); + + return skb; +} + +struct sk_buff * +mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) +{ + struct mlx5e_dma_info *di = wi->di; + u16 rx_headroom = rq->buff.headroom; + struct sk_buff *skb; + void *va, *data; + bool consumed; + u32 frag_size; + + va = page_address(di->page) + wi->offset; + data = va + rx_headroom; + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); + + dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset, + frag_size, DMA_FROM_DEVICE); + prefetchw(va); /* xdp_frame data area */ + prefetch(data); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats->wqe_err++; + return NULL; + } + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt); + rcu_read_unlock(); + if (consumed) + return NULL; /* page/packet was consumed by XDP */ + + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt); + if (unlikely(!skb)) + return NULL; + + /* queue up for recycling/reuse */ + page_ref_inc(di->page); + + return skb; +} + +struct sk_buff * +mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) +{ + struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; + struct mlx5e_wqe_frag_info *head_wi = wi; + u16 headlen = min_t(u32, MLX5E_RX_MAX_HEAD, cqe_bcnt); + u16 frag_headlen = headlen; + u16 byte_cnt = cqe_bcnt - headlen; + struct sk_buff *skb; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats->wqe_err++; + return NULL; + } + + /* XDP is not supported in this configuration, as incoming packets + * might spread among multiple pages. + */ + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + prefetchw(skb->data); + + while (byte_cnt) { + u16 frag_consumed_bytes = + min_t(u16, frag_info->frag_size - frag_headlen, byte_cnt); + + mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset + frag_headlen, + frag_consumed_bytes, frag_info->frag_stride); + byte_cnt -= frag_consumed_bytes; + frag_headlen = 0; + frag_info++; + wi++; + } + + /* copy header */ + mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, + 0, headlen); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += headlen; + skb->len += headlen; + + return skb; +} + +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + if (!skb) { + /* probably for XDP */ + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + /* do not return page to cache, + * it will be returned on XDP_TX completion. + */ + goto wq_cyc_pop; + } + goto free_wqe; + } + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); + +free_wqe: + mlx5e_free_rx_wqe(rq, wi, true); +wq_cyc_pop: + mlx5_wq_cyc_pop(wq); +} + +#ifdef CONFIG_MLX5_ESWITCH +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct net_device *netdev = rq->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + if (!skb) { + /* probably for XDP */ + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + /* do not return page to cache, + * it will be returned on XDP_TX completion. + */ + goto wq_cyc_pop; + } + goto free_wqe; + } + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (rep->vlan && skb_vlan_tag_present(skb)) + skb_vlan_pop(skb); + + napi_gro_receive(rq->cq.napi, skb); + +free_wqe: + mlx5e_free_rx_wqe(rq, wi, true); +wq_cyc_pop: + mlx5_wq_cyc_pop(wq); +} +#endif + +struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx) +{ + u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); + struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; + u32 frag_offset = head_offset + headlen; + u32 byte_cnt = cqe_bcnt - headlen; + struct mlx5e_dma_info *head_di = di; + struct sk_buff *skb; + + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + prefetchw(skb->data); + + if (unlikely(frag_offset >= PAGE_SIZE)) { + di++; + frag_offset -= PAGE_SIZE; + } + + while (byte_cnt) { + u32 pg_consumed_bytes = + min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); + unsigned int truesize = + ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); + + mlx5e_add_skb_frag(rq, skb, di, frag_offset, + pg_consumed_bytes, truesize); + byte_cnt -= pg_consumed_bytes; + frag_offset = 0; + di++; + } + /* copy header */ + mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, head_di, + head_offset, headlen); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += headlen; + skb->len += headlen; + + return skb; +} + +struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx) +{ + struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; + u16 rx_headroom = rq->buff.headroom; + u32 cqe_bcnt32 = cqe_bcnt; + struct sk_buff *skb; + void *va, *data; + u32 frag_size; + bool consumed; + + /* Check packet size. Note LRO doesn't use linear SKB */ + if (unlikely(cqe_bcnt > rq->hw_mtu)) { + rq->stats->oversize_pkts_sw_drop++; + return NULL; + } + + va = page_address(di->page) + head_offset; + data = va + rx_headroom; + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32); + + dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset, + frag_size, DMA_FROM_DEVICE); + prefetchw(va); /* xdp_frame data area */ + prefetch(data); + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32); + rcu_read_unlock(); + if (consumed) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } + + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32); + if (unlikely(!skb)) + return NULL; + + /* queue up for recycling/reuse */ + page_ref_inc(di->page); + + return skb; +} + +void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; + u32 head_offset = wqe_offset & (PAGE_SIZE - 1); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + struct mlx5e_rx_wqe_ll *wqe; + struct mlx5_wq_ll *wq; + struct sk_buff *skb; + u16 cqe_bcnt; + + wi->consumed_strides += cstrides; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats->wqe_err++; + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + struct mlx5e_rq_stats *stats = rq->stats; + + stats->mpwqe_filler_cqes++; + stats->mpwqe_filler_strides += cstrides; + goto mpwrq_cqe_out; + } + + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + + skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset, + page_idx); + if (!skb) + goto mpwrq_cqe_out; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) + return; + + wq = &rq->mpwqe.wq; + wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); + mlx5e_free_rx_mpwqe(rq, wi, true); + mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} + +int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) +{ + struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); + struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + struct mlx5_cqe64 *cqe; + int work_done = 0; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + return 0; + + if (cq->decmprs_left) { + work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); + if (cq->decmprs_left || work_done >= budget) + goto out; + } + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) { + if (unlikely(work_done)) + goto out; + return 0; + } + + do { + if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { + work_done += + mlx5e_decompress_cqes_start(rq, cq, + budget - work_done); + continue; + } + + mlx5_cqwq_pop(&cq->wq); + + rq->handle_rx_cqe(rq, cqe); + } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + +out: + if (xdpsq->doorbell) { + mlx5e_xmit_xdp_doorbell(xdpsq); + xdpsq->doorbell = false; + } + + if (xdpsq->redirect_flush) { + xdp_do_flush_map(); + xdpsq->redirect_flush = false; + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + return work_done; +} + +#ifdef CONFIG_MLX5_CORE_IPOIB + +#define MLX5_IB_GRH_SGID_OFFSET 8 +#define MLX5_IB_GRH_DGID_OFFSET 24 +#define MLX5_GID_SIZE 16 + +static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + struct mlx5e_rq_stats *stats = rq->stats; + struct hwtstamp_config *tstamp; + struct net_device *netdev; + struct mlx5e_priv *priv; + char *pseudo_header; + u32 flags_rqpn; + u32 qpn; + u8 *dgid; + u8 g; + + qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff; + netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn); + + /* No mapping present, cannot process SKB. This might happen if a child + * interface is going down while having unprocessed CQEs on parent RQ + */ + if (unlikely(!netdev)) { + /* TODO: add drop counters support */ + skb->dev = NULL; + pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn); + return; + } + + priv = mlx5i_epriv(netdev); + tstamp = &priv->tstamp; + + flags_rqpn = be32_to_cpu(cqe->flags_rqpn); + g = (flags_rqpn >> 28) & 3; + dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; + if ((!g) || dgid[0] != 0xff) + skb->pkt_type = PACKET_HOST; + else if (memcmp(dgid, netdev->broadcast + 4, MLX5_GID_SIZE) == 0) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + + /* Drop packets that this interface sent, ie multicast packets + * that the HCA has replicated. + */ + if (g && (qpn == (flags_rqpn & 0xffffff)) && + (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET, + MLX5_GID_SIZE) == 0)) { + skb->dev = NULL; + return; + } + + skb_pull(skb, MLX5_IB_GRH_BYTES); + + skb->protocol = *((__be16 *)(skb->data)); + + if (netdev->features & NETIF_F_RXCSUM) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + stats->csum_complete++; + } else { + skb->ip_summed = CHECKSUM_NONE; + stats->csum_none++; + } + + if (unlikely(mlx5e_rx_hw_stamp(tstamp))) + skb_hwtstamps(skb)->hwtstamp = + mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe)); + + skb_record_rx_queue(skb, rq->ix); + + if (likely(netdev->features & NETIF_F_RXHASH)) + mlx5e_skb_set_hash(cqe, skb); + + /* 20 bytes of ipoib header and 4 for encap existing */ + pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN); + memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN); + skb_reset_mac_header(skb); + skb_pull(skb, MLX5_IPOIB_HARD_LEN); + + skb->dev = netdev; + + stats->packets++; + stats->bytes += cqe_bcnt; +} + +void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + if (!skb) + goto wq_free_wqe; + + mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (unlikely(!skb->dev)) { + dev_kfree_skb_any(skb); + goto wq_free_wqe; + } + napi_gro_receive(rq->cq.napi, skb); + +wq_free_wqe: + mlx5e_free_rx_wqe(rq, wi, true); + mlx5_wq_cyc_pop(wq); +} + +#endif /* CONFIG_MLX5_CORE_IPOIB */ + +#ifdef CONFIG_MLX5_EN_IPSEC + +void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + if (unlikely(!skb)) { + /* a DROP, save the page-reuse checks */ + mlx5e_free_rx_wqe(rq, wi, true); + goto wq_cyc_pop; + } + skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt); + if (unlikely(!skb)) { + mlx5e_free_rx_wqe(rq, wi, true); + goto wq_cyc_pop; + } + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); + + mlx5e_free_rx_wqe(rq, wi, true); +wq_cyc_pop: + mlx5_wq_cyc_pop(wq); +} + +#endif /* CONFIG_MLX5_EN_IPSEC */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c new file mode 100644 index 000000000..5fb088b54 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/prefetch.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <net/udp.h> +#include "en.h" +#include "en/port.h" + +enum { + MLX5E_ST_LINK_STATE, + MLX5E_ST_LINK_SPEED, + MLX5E_ST_HEALTH_INFO, +#ifdef CONFIG_INET + MLX5E_ST_LOOPBACK, +#endif + MLX5E_ST_NUM, +}; + +const char mlx5e_self_tests[MLX5E_ST_NUM][ETH_GSTRING_LEN] = { + "Link Test", + "Speed Test", + "Health Test", +#ifdef CONFIG_INET + "Loopback Test", +#endif +}; + +int mlx5e_self_test_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5e_self_tests); +} + +static int mlx5e_test_health_info(struct mlx5e_priv *priv) +{ + struct mlx5_core_health *health = &priv->mdev->priv.health; + + return health->sick ? 1 : 0; +} + +static int mlx5e_test_link_state(struct mlx5e_priv *priv) +{ + u8 port_state; + + if (!netif_carrier_ok(priv->netdev)) + return 1; + + port_state = mlx5_query_vport_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0); + return port_state == VPORT_STATE_UP ? 0 : 1; +} + +static int mlx5e_test_link_speed(struct mlx5e_priv *priv) +{ + u32 speed; + + if (!netif_carrier_ok(priv->netdev)) + return 1; + + return mlx5e_port_linkspeed(priv->mdev, &speed); +} + +struct mlx5ehdr { + __be32 version; + __be64 magic; +}; + +#ifdef CONFIG_INET +/* loopback test */ +#define MLX5E_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) +\ + sizeof(struct udphdr) + sizeof(struct mlx5ehdr)) +#define MLX5E_TEST_MAGIC 0x5AEED15C001ULL + +static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) +{ + struct sk_buff *skb = NULL; + struct mlx5ehdr *mlxh; + struct ethhdr *ethh; + struct udphdr *udph; + struct iphdr *iph; + int iplen; + + skb = netdev_alloc_skb(priv->netdev, MLX5E_TEST_PKT_SIZE); + if (!skb) { + netdev_err(priv->netdev, "\tFailed to alloc loopback skb\n"); + return NULL; + } + + prefetchw(skb->data); + skb_reserve(skb, NET_IP_ALIGN); + + /* Reserve for ethernet and IP header */ + ethh = skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + + skb_set_network_header(skb, skb->len); + iph = skb_put(skb, sizeof(struct iphdr)); + + skb_set_transport_header(skb, skb->len); + udph = skb_put(skb, sizeof(struct udphdr)); + + /* Fill ETH header */ + ether_addr_copy(ethh->h_dest, priv->netdev->dev_addr); + eth_zero_addr(ethh->h_source); + ethh->h_proto = htons(ETH_P_IP); + + /* Fill UDP header */ + udph->source = htons(9); + udph->dest = htons(9); /* Discard Protocol */ + udph->len = htons(sizeof(struct mlx5ehdr) + sizeof(struct udphdr)); + udph->check = 0; + + /* Fill IP header */ + iph->ihl = 5; + iph->ttl = 32; + iph->version = 4; + iph->protocol = IPPROTO_UDP; + iplen = sizeof(struct iphdr) + sizeof(struct udphdr) + + sizeof(struct mlx5ehdr); + iph->tot_len = htons(iplen); + iph->frag_off = 0; + iph->saddr = 0; + iph->daddr = 0; + iph->tos = 0; + iph->id = 0; + ip_send_check(iph); + + /* Fill test header and data */ + mlxh = skb_put(skb, sizeof(*mlxh)); + mlxh->version = 0; + mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC); + + skb->csum = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + udp4_hwcsum(skb, iph->saddr, iph->daddr); + + skb->protocol = htons(ETH_P_IP); + skb->pkt_type = PACKET_HOST; + skb->dev = priv->netdev; + + return skb; +} + +struct mlx5e_lbt_priv { + struct packet_type pt; + struct completion comp; + bool loopback_ok; + bool local_lb; +}; + +static int +mlx5e_test_loopback_validate(struct sk_buff *skb, + struct net_device *ndev, + struct packet_type *pt, + struct net_device *orig_ndev) +{ + struct mlx5e_lbt_priv *lbtp = pt->af_packet_priv; + struct mlx5ehdr *mlxh; + struct ethhdr *ethh; + struct udphdr *udph; + struct iphdr *iph; + + /* We are only going to peek, no need to clone the SKB */ + if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb)) + goto out; + + ethh = (struct ethhdr *)skb_mac_header(skb); + if (!ether_addr_equal(ethh->h_dest, orig_ndev->dev_addr)) + goto out; + + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_UDP) + goto out; + + /* Don't assume skb_transport_header() was set */ + udph = (struct udphdr *)((u8 *)iph + 4 * iph->ihl); + if (udph->dest != htons(9)) + goto out; + + mlxh = (struct mlx5ehdr *)((char *)udph + sizeof(*udph)); + if (mlxh->magic != cpu_to_be64(MLX5E_TEST_MAGIC)) + goto out; /* so close ! */ + + /* bingo */ + lbtp->loopback_ok = true; + complete(&lbtp->comp); +out: + kfree_skb(skb); + return 0; +} + +static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, + struct mlx5e_lbt_priv *lbtp) +{ + int err = 0; + + /* Temporarily enable local_lb */ + err = mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb); + if (err) + return err; + + if (!lbtp->local_lb) { + err = mlx5_nic_vport_update_local_lb(priv->mdev, true); + if (err) + return err; + } + + err = mlx5e_refresh_tirs(priv, true); + if (err) + goto out; + + lbtp->loopback_ok = false; + init_completion(&lbtp->comp); + + lbtp->pt.type = htons(ETH_P_IP); + lbtp->pt.func = mlx5e_test_loopback_validate; + lbtp->pt.dev = priv->netdev; + lbtp->pt.af_packet_priv = lbtp; + dev_add_pack(&lbtp->pt); + + return 0; + +out: + if (!lbtp->local_lb) + mlx5_nic_vport_update_local_lb(priv->mdev, false); + + return err; +} + +static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv, + struct mlx5e_lbt_priv *lbtp) +{ + if (!lbtp->local_lb) + mlx5_nic_vport_update_local_lb(priv->mdev, false); + + dev_remove_pack(&lbtp->pt); + mlx5e_refresh_tirs(priv, false); +} + +#define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200)) +static int mlx5e_test_loopback(struct mlx5e_priv *priv) +{ + struct mlx5e_lbt_priv *lbtp; + struct sk_buff *skb = NULL; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + netdev_err(priv->netdev, + "\tCan't perform loopback test while device is down\n"); + return -ENODEV; + } + + lbtp = kzalloc(sizeof(*lbtp), GFP_KERNEL); + if (!lbtp) + return -ENOMEM; + lbtp->loopback_ok = false; + + err = mlx5e_test_loopback_setup(priv, lbtp); + if (err) + goto out; + + skb = mlx5e_test_get_udp_skb(priv); + if (!skb) { + err = -ENOMEM; + goto cleanup; + } + + skb_set_queue_mapping(skb, 0); + err = dev_queue_xmit(skb); + if (err) { + netdev_err(priv->netdev, + "\tFailed to xmit loopback packet err(%d)\n", + err); + goto cleanup; + } + + wait_for_completion_timeout(&lbtp->comp, MLX5E_LB_VERIFY_TIMEOUT); + err = !lbtp->loopback_ok; + +cleanup: + mlx5e_test_loopback_cleanup(priv, lbtp); +out: + kfree(lbtp); + return err; +} +#endif + +static int (*mlx5e_st_func[MLX5E_ST_NUM])(struct mlx5e_priv *) = { + mlx5e_test_link_state, + mlx5e_test_link_speed, + mlx5e_test_health_info, +#ifdef CONFIG_INET + mlx5e_test_loopback, +#endif +}; + +void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest, + u64 *buf) +{ + struct mlx5e_priv *priv = netdev_priv(ndev); + int i; + + memset(buf, 0, sizeof(u64) * MLX5E_ST_NUM); + + mutex_lock(&priv->state_lock); + netdev_info(ndev, "Self test begin..\n"); + + for (i = 0; i < MLX5E_ST_NUM; i++) { + netdev_info(ndev, "\t[%d] %s start..\n", + i, mlx5e_self_tests[i]); + buf[i] = mlx5e_st_func[i](priv); + netdev_info(ndev, "\t[%d] %s end: result(%lld)\n", + i, mlx5e_self_tests[i], buf[i]); + } + + mutex_unlock(&priv->state_lock); + + for (i = 0; i < MLX5E_ST_NUM; i++) { + if (buf[i]) { + etest->flags |= ETH_TEST_FL_FAILED; + break; + } + } + netdev_info(ndev, "Self test out: status flags(0x%x)\n", + etest->flags); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c new file mode 100644 index 000000000..9a68dee58 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -0,0 +1,1404 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" +#include "en_accel/ipsec.h" +#include "en_accel/tls.h" + +static const struct counter_desc sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) }, + +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) }, +#endif + + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_strides) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) }, +}; + +#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) + +static int mlx5e_grp_sw_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_SW_COUNTERS; +} + +static int mlx5e_grp_sw_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) +{ + int i; + + for (i = 0; i < NUM_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +{ + int i; + + for (i = 0; i < NUM_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, sw_stats_desc, i); + return idx; +} + +void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_sw_stats temp, *s = &temp; + int i; + + memset(s, 0, sizeof(*s)); + + for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) { + struct mlx5e_channel_stats *channel_stats = + &priv->channel_stats[i]; + struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq; + struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq; + struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; + struct mlx5e_ch_stats *ch_stats = &channel_stats->ch; + int j; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + s->rx_lro_packets += rq_stats->lro_packets; + s->rx_lro_bytes += rq_stats->lro_bytes; + s->rx_ecn_mark += rq_stats->ecn_mark; + s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; + s->rx_csum_none += rq_stats->csum_none; + s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_complete_tail += rq_stats->csum_complete_tail; + s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow; + s->rx_csum_unnecessary += rq_stats->csum_unnecessary; + s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; + s->rx_xdp_drop += rq_stats->xdp_drop; + s->rx_xdp_redirect += rq_stats->xdp_redirect; + s->rx_xdp_tx_xmit += xdpsq_stats->xmit; + s->rx_xdp_tx_full += xdpsq_stats->full; + s->rx_xdp_tx_err += xdpsq_stats->err; + s->rx_xdp_tx_cqe += xdpsq_stats->cqes; + s->rx_wqe_err += rq_stats->wqe_err; + s->rx_mpwqe_filler_cqes += rq_stats->mpwqe_filler_cqes; + s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides; + s->rx_oversize_pkts_sw_drop += rq_stats->oversize_pkts_sw_drop; + s->rx_buff_alloc_err += rq_stats->buff_alloc_err; + s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; + s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; + s->rx_page_reuse += rq_stats->page_reuse; + s->rx_cache_reuse += rq_stats->cache_reuse; + s->rx_cache_full += rq_stats->cache_full; + s->rx_cache_empty += rq_stats->cache_empty; + s->rx_cache_busy += rq_stats->cache_busy; + s->rx_cache_waive += rq_stats->cache_waive; + s->rx_congst_umr += rq_stats->congst_umr; + s->ch_events += ch_stats->events; + s->ch_poll += ch_stats->poll; + s->ch_arm += ch_stats->arm; + s->ch_aff_change += ch_stats->aff_change; + s->ch_eq_rearm += ch_stats->eq_rearm; + /* xdp redirect */ + s->tx_xdp_xmit += xdpsq_red_stats->xmit; + s->tx_xdp_full += xdpsq_red_stats->full; + s->tx_xdp_err += xdpsq_red_stats->err; + s->tx_xdp_cqes += xdpsq_red_stats->cqes; + + for (j = 0; j < priv->max_opened_tc; j++) { + struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_tso_packets += sq_stats->tso_packets; + s->tx_tso_bytes += sq_stats->tso_bytes; + s->tx_tso_inner_packets += sq_stats->tso_inner_packets; + s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; + s->tx_added_vlan_packets += sq_stats->added_vlan_packets; + s->tx_nop += sq_stats->nop; + s->tx_queue_stopped += sq_stats->stopped; + s->tx_queue_wake += sq_stats->wake; + s->tx_queue_dropped += sq_stats->dropped; + s->tx_cqe_err += sq_stats->cqe_err; + s->tx_recover += sq_stats->recover; + s->tx_xmit_more += sq_stats->xmit_more; + s->tx_csum_partial_inner += sq_stats->csum_partial_inner; + s->tx_csum_none += sq_stats->csum_none; + s->tx_csum_partial += sq_stats->csum_partial; +#ifdef CONFIG_MLX5_EN_TLS + s->tx_tls_ooo += sq_stats->tls_ooo; + s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes; +#endif + s->tx_cqes += sq_stats->cqes; + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } + } + + memcpy(&priv->stats.sw, s, sizeof(*s)); +} + +static const struct counter_desc q_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, +}; + +static const struct counter_desc drop_rq_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_if_down_packets) }, +}; + +#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) +#define NUM_DROP_RQ_COUNTERS ARRAY_SIZE(drop_rq_stats_desc) + +static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv) +{ + int num_stats = 0; + + if (priv->q_counter) + num_stats += NUM_Q_COUNTERS; + + if (priv->drop_rq_q_counter) + num_stats += NUM_DROP_RQ_COUNTERS; + + return num_stats; +} + +static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) +{ + int i; + + for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + q_stats_desc[i].format); + + for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + drop_rq_stats_desc[i].format); + + return idx; +} + +static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +{ + int i; + + for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + q_stats_desc, i); + for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++) + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + drop_rq_stats_desc, i); + return idx; +} + +static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)]; + + if (priv->q_counter && + !mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out, + sizeof(out))) + qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, + out, out_of_buffer); + if (priv->drop_rq_q_counter && + !mlx5_core_query_q_counter(priv->mdev, priv->drop_rq_q_counter, 0, + out, sizeof(out))) + qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out, out, + out_of_buffer); +} + +#define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c) +static const struct counter_desc vnic_env_stats_desc[] = { + { "rx_steer_missed_packets", + VNIC_ENV_OFF(vport_env.nic_receive_steering_discard) }, +}; + +#define NUM_VNIC_ENV_COUNTERS ARRAY_SIZE(vnic_env_stats_desc) + +static int mlx5e_grp_vnic_env_get_num_stats(struct mlx5e_priv *priv) +{ + return MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard) ? + NUM_VNIC_ENV_COUNTERS : 0; +} + +static int mlx5e_grp_vnic_env_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) + return idx; + + for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vnic_env_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_vnic_env_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) + return idx; + + for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_desc, i); + return idx; +} + +static void mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv) +{ + u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out; + int outlen = MLX5_ST_SZ_BYTES(query_vnic_env_out); + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0}; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) + return; + + MLX5_SET(query_vnic_env_in, in, opcode, + MLX5_CMD_OP_QUERY_VNIC_ENV); + MLX5_SET(query_vnic_env_in, in, op_mod, 0); + MLX5_SET(query_vnic_env_in, in, other_vport, 0); + mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); +} + +#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) +static const struct counter_desc vport_stats_desc[] = { + { "rx_vport_unicast_packets", + VPORT_COUNTER_OFF(received_eth_unicast.packets) }, + { "rx_vport_unicast_bytes", + VPORT_COUNTER_OFF(received_eth_unicast.octets) }, + { "tx_vport_unicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) }, + { "tx_vport_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) }, + { "rx_vport_multicast_packets", + VPORT_COUNTER_OFF(received_eth_multicast.packets) }, + { "rx_vport_multicast_bytes", + VPORT_COUNTER_OFF(received_eth_multicast.octets) }, + { "tx_vport_multicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) }, + { "tx_vport_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) }, + { "rx_vport_broadcast_packets", + VPORT_COUNTER_OFF(received_eth_broadcast.packets) }, + { "rx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(received_eth_broadcast.octets) }, + { "tx_vport_broadcast_packets", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, + { "tx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, + { "rx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(received_ib_unicast.packets) }, + { "rx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(received_ib_unicast.octets) }, + { "tx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) }, + { "tx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) }, + { "rx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(received_ib_multicast.packets) }, + { "rx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(received_ib_multicast.octets) }, + { "tx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) }, + { "tx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, +}; + +#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) + +static int mlx5e_grp_vport_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_VPORT_COUNTERS; +} + +static int mlx5e_grp_vport_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_stats_desc, i); + return idx; +} + +static void mlx5e_grp_vport_update_stats(struct mlx5e_priv *priv) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 *out = (u32 *)priv->stats.vport.query_vport_out; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0}; + struct mlx5_core_dev *mdev = priv->mdev; + + MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, other_vport, 0); + mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); +} + +#define PPORT_802_3_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_802_3_stats_desc[] = { + { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) }, + { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) }, + { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, + { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) }, + { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) }, + { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, + { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, + { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, + { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, + { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) }, + { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) }, + { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) }, + { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, + { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, + { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) }, + { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) }, + { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, + { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, +}; + +#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) + +static int mlx5e_grp_802_3_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PPORT_802_3_COUNTERS; +} + +static int mlx5e_grp_802_3_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_802_3_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, + pport_802_3_stats_desc, i); + return idx; +} + +static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->IEEE_802_3_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +#define PPORT_2863_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_2863_stats_desc[] = { + { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) }, + { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) }, + { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) }, +}; + +#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) + +static int mlx5e_grp_2863_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PPORT_2863_COUNTERS; +} + +static int mlx5e_grp_2863_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2863_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, + pport_2863_stats_desc, i); + return idx; +} + +static void mlx5e_grp_2863_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->RFC_2863_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +#define PPORT_2819_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_2819_stats_desc[] = { + { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) }, + { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) }, + { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) }, + { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) }, + { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, + { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, + { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, + { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, + { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, + { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, + { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, + { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, + { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, +}; + +#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) + +static int mlx5e_grp_2819_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PPORT_2819_COUNTERS; +} + +static int mlx5e_grp_2819_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, + pport_2819_stats_desc, i); + return idx; +} + +static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->RFC_2819_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +#define PPORT_PHY_STATISTICAL_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.phys_layer_statistical_cntrs.c##_high) +static const struct counter_desc pport_phy_statistical_stats_desc[] = { + { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, + { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, +}; + +#define NUM_PPORT_PHY_STATISTICAL_COUNTERS ARRAY_SIZE(pport_phy_statistical_stats_desc) + +static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv) +{ + /* "1" for link_down_events special counter */ + return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ? + NUM_PPORT_PHY_STATISTICAL_COUNTERS + 1 : 1; +} + +static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy"); + + if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) + return idx; + + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_phy_statistical_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +{ + int i; + + /* link_down_events_phy has special handling since it is not stored in __be64 format */ + data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, + counter_set.phys_layer_cntrs.link_down_events); + + if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) + return idx; + + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_stats_desc, i); + return idx; +} + +static void mlx5e_grp_phy_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->phy_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + return; + + out = pstats->phy_statistical_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +#define PPORT_ETH_EXT_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_eth_ext_stats_desc[] = { + { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) }, +}; + +#define NUM_PPORT_ETH_EXT_COUNTERS ARRAY_SIZE(pport_eth_ext_stats_desc) + +static int mlx5e_grp_eth_ext_get_num_stats(struct mlx5e_priv *priv) +{ + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + return NUM_PPORT_ETH_EXT_COUNTERS; + + return 0; +} + +static int mlx5e_grp_eth_ext_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_eth_ext_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters, + pport_eth_ext_stats_desc, i); + return idx; +} + +static void mlx5e_grp_eth_ext_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + if (!MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->eth_ext_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +#define PCIE_PERF_OFF(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) +static const struct counter_desc pcie_perf_stats_desc[] = { + { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) }, + { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, +}; + +#define PCIE_PERF_OFF64(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pcie_perf_stats_desc64[] = { + { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) }, +}; + +static const struct counter_desc pcie_perf_stall_stats_desc[] = { + { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) }, + { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) }, + { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) }, + { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) }, +}; + +#define NUM_PCIE_PERF_COUNTERS ARRAY_SIZE(pcie_perf_stats_desc) +#define NUM_PCIE_PERF_COUNTERS64 ARRAY_SIZE(pcie_perf_stats_desc64) +#define NUM_PCIE_PERF_STALL_COUNTERS ARRAY_SIZE(pcie_perf_stall_stats_desc) + +static int mlx5e_grp_pcie_get_num_stats(struct mlx5e_priv *priv) +{ + int num_stats = 0; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + num_stats += NUM_PCIE_PERF_COUNTERS; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + num_stats += NUM_PCIE_PERF_COUNTERS64; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + num_stats += NUM_PCIE_PERF_STALL_COUNTERS; + + return num_stats; +} + +static int mlx5e_grp_pcie_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc[i].format); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc64[i].format); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stall_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc, i); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc64, i); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stall_stats_desc, i); + return idx; +} + +static void mlx5e_grp_pcie_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(mpcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); + void *out; + + if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group)) + return; + + out = pcie_stats->pcie_perf_counters; + MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); +} + +#define PPORT_PER_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { + { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, + { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, + { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) }, + { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) }, +}; + +#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS ARRAY_SIZE(pport_per_prio_traffic_stats_desc) + +static int mlx5e_grp_per_prio_traffic_get_num_stats(void) +{ + return NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * NUM_PPORT_PRIO; +} + +static int mlx5e_grp_per_prio_traffic_fill_strings(struct mlx5e_priv *priv, + u8 *data, + int idx) +{ + int i, prio; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_traffic_stats_desc[i].format, prio); + } + + return idx; +} + +static int mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv, + u64 *data, + int idx) +{ + int i, prio; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_traffic_stats_desc, i); + } + + return idx; +} + +static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { + /* %s is "global" or "prio{i}" */ + { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, +}; + +static const struct counter_desc pport_pfc_stall_stats_desc[] = { + { "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) }, + { "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) }, +}; + +#define NUM_PPORT_PER_PRIO_PFC_COUNTERS ARRAY_SIZE(pport_per_prio_pfc_stats_desc) +#define NUM_PPORT_PFC_STALL_COUNTERS(priv) (ARRAY_SIZE(pport_pfc_stall_stats_desc) * \ + MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) * \ + MLX5_CAP_DEBUG((priv)->mdev, stall_detect)) + +static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 pfc_en_tx; + u8 pfc_en_rx; + int err; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx); + + return err ? 0 : pfc_en_tx | pfc_en_rx; +} + +static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 rx_pause; + u32 tx_pause; + int err; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return false; + + err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + + return err ? false : rx_pause | tx_pause; +} + +static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv) +{ + return (mlx5e_query_global_pause_combined(priv) + + hweight8(mlx5e_query_pfc_combined(priv))) * + NUM_PPORT_PER_PRIO_PFC_COUNTERS + + NUM_PPORT_PFC_STALL_COUNTERS(priv); +} + +static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv, + u8 *data, + int idx) +{ + unsigned long pfc_combined; + int i, prio; + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + char pfc_string[ETH_GSTRING_LEN]; + + snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, pfc_string); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, "global"); + } + } + + for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_pfc_stall_stats_desc[i].format); + + return idx; +} + +static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, + u64 *data, + int idx) +{ + unsigned long pfc_combined; + int i, prio; + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_pfc_stats_desc, i); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], + pport_per_prio_pfc_stats_desc, i); + } + } + + for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], + pport_pfc_stall_stats_desc, i); + + return idx; +} + +static int mlx5e_grp_per_prio_get_num_stats(struct mlx5e_priv *priv) +{ + return mlx5e_grp_per_prio_traffic_get_num_stats() + + mlx5e_grp_per_prio_pfc_get_num_stats(priv); +} + +static int mlx5e_grp_per_prio_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + idx = mlx5e_grp_per_prio_traffic_fill_strings(priv, data, idx); + idx = mlx5e_grp_per_prio_pfc_fill_strings(priv, data, idx); + return idx; +} + +static int mlx5e_grp_per_prio_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + idx = mlx5e_grp_per_prio_traffic_fill_stats(priv, data, idx); + idx = mlx5e_grp_per_prio_pfc_fill_stats(priv, data, idx); + return idx; +} + +static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int prio; + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, + MLX5_REG_PPCNT, 0, 0); + } +} + +static const struct counter_desc mlx5e_pme_status_desc[] = { + { "module_unplug", 8 }, +}; + +static const struct counter_desc mlx5e_pme_error_desc[] = { + { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */ + { "module_high_temp", 48 }, /* high temperature */ + { "module_bad_shorted", 56 }, /* bad or shorted cable/module */ +}; + +#define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc) +#define NUM_PME_ERR_STATS ARRAY_SIZE(mlx5e_pme_error_desc) + +static int mlx5e_grp_pme_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PME_STATUS_STATS + NUM_PME_ERR_STATS; +} + +static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PME_STATUS_STATS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format); + + for (i = 0; i < NUM_PME_ERR_STATS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format); + + return idx; +} + +static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + struct mlx5_priv *mlx5_priv = &priv->mdev->priv; + int i; + + for (i = 0; i < NUM_PME_STATUS_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters, + mlx5e_pme_status_desc, i); + + for (i = 0; i < NUM_PME_ERR_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters, + mlx5e_pme_error_desc, i); + + return idx; +} + +static int mlx5e_grp_ipsec_get_num_stats(struct mlx5e_priv *priv) +{ + return mlx5e_ipsec_get_count(priv); +} + +static int mlx5e_grp_ipsec_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + return idx + mlx5e_ipsec_get_strings(priv, + data + idx * ETH_GSTRING_LEN); +} + +static int mlx5e_grp_ipsec_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + return idx + mlx5e_ipsec_get_stats(priv, data + idx); +} + +static void mlx5e_grp_ipsec_update_stats(struct mlx5e_priv *priv) +{ + mlx5e_ipsec_update_stats(priv); +} + +static int mlx5e_grp_tls_get_num_stats(struct mlx5e_priv *priv) +{ + return mlx5e_tls_get_count(priv); +} + +static int mlx5e_grp_tls_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN); +} + +static int mlx5e_grp_tls_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +{ + return idx + mlx5e_tls_get_stats(priv, data + idx); +} + +static const struct counter_desc rq_stats_desc[] = { + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, +}; + +static const struct counter_desc sq_stats_desc[] = { + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, recover) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, +}; + +static const struct counter_desc rq_xdpsq_stats_desc[] = { + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, +}; + +static const struct counter_desc xdpsq_stats_desc[] = { + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, +}; + +static const struct counter_desc ch_stats_desc[] = { + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) }, +}; + +#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) +#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) +#define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc) +#define NUM_RQ_XDPSQ_STATS ARRAY_SIZE(rq_xdpsq_stats_desc) +#define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc) + +static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) +{ + int max_nch = priv->profile->max_nch(priv->mdev); + + return (NUM_RQ_STATS * max_nch) + + (NUM_CH_STATS * max_nch) + + (NUM_SQ_STATS * max_nch * priv->max_opened_tc) + + (NUM_RQ_XDPSQ_STATS * max_nch) + + (NUM_XDPSQ_STATS * max_nch); +} + +static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int max_nch = priv->profile->max_nch(priv->mdev); + int i, j, tc; + + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_CH_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ch_stats_desc[j].format, i); + + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_RQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + rq_stats_desc[j].format, i); + for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + rq_xdpsq_stats_desc[j].format, i); + } + + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + sq_stats_desc[j].format, + priv->channel_tc2txq[i][tc]); + + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_XDPSQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xdpsq_stats_desc[j].format, i); + + return idx; +} + +static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int max_nch = priv->profile->max_nch(priv->mdev); + int i, j, tc; + + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_CH_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].ch, + ch_stats_desc, j); + + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_RQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq, + rq_stats_desc, j); + for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq, + rq_xdpsq_stats_desc, j); + } + + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc], + sq_stats_desc, j); + + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_XDPSQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq, + xdpsq_stats_desc, j); + + return idx; +} + +/* The stats groups order is opposite to the update_stats() order calls */ +const struct mlx5e_stats_grp mlx5e_stats_grps[] = { + { + .get_num_stats = mlx5e_grp_sw_get_num_stats, + .fill_strings = mlx5e_grp_sw_fill_strings, + .fill_stats = mlx5e_grp_sw_fill_stats, + .update_stats = mlx5e_grp_sw_update_stats, + }, + { + .get_num_stats = mlx5e_grp_q_get_num_stats, + .fill_strings = mlx5e_grp_q_fill_strings, + .fill_stats = mlx5e_grp_q_fill_stats, + .update_stats_mask = MLX5E_NDO_UPDATE_STATS, + .update_stats = mlx5e_grp_q_update_stats, + }, + { + .get_num_stats = mlx5e_grp_vnic_env_get_num_stats, + .fill_strings = mlx5e_grp_vnic_env_fill_strings, + .fill_stats = mlx5e_grp_vnic_env_fill_stats, + .update_stats = mlx5e_grp_vnic_env_update_stats, + }, + { + .get_num_stats = mlx5e_grp_vport_get_num_stats, + .fill_strings = mlx5e_grp_vport_fill_strings, + .fill_stats = mlx5e_grp_vport_fill_stats, + .update_stats_mask = MLX5E_NDO_UPDATE_STATS, + .update_stats = mlx5e_grp_vport_update_stats, + }, + { + .get_num_stats = mlx5e_grp_802_3_get_num_stats, + .fill_strings = mlx5e_grp_802_3_fill_strings, + .fill_stats = mlx5e_grp_802_3_fill_stats, + .update_stats_mask = MLX5E_NDO_UPDATE_STATS, + .update_stats = mlx5e_grp_802_3_update_stats, + }, + { + .get_num_stats = mlx5e_grp_2863_get_num_stats, + .fill_strings = mlx5e_grp_2863_fill_strings, + .fill_stats = mlx5e_grp_2863_fill_stats, + .update_stats = mlx5e_grp_2863_update_stats, + }, + { + .get_num_stats = mlx5e_grp_2819_get_num_stats, + .fill_strings = mlx5e_grp_2819_fill_strings, + .fill_stats = mlx5e_grp_2819_fill_stats, + .update_stats = mlx5e_grp_2819_update_stats, + }, + { + .get_num_stats = mlx5e_grp_phy_get_num_stats, + .fill_strings = mlx5e_grp_phy_fill_strings, + .fill_stats = mlx5e_grp_phy_fill_stats, + .update_stats = mlx5e_grp_phy_update_stats, + }, + { + .get_num_stats = mlx5e_grp_eth_ext_get_num_stats, + .fill_strings = mlx5e_grp_eth_ext_fill_strings, + .fill_stats = mlx5e_grp_eth_ext_fill_stats, + .update_stats = mlx5e_grp_eth_ext_update_stats, + }, + { + .get_num_stats = mlx5e_grp_pcie_get_num_stats, + .fill_strings = mlx5e_grp_pcie_fill_strings, + .fill_stats = mlx5e_grp_pcie_fill_stats, + .update_stats = mlx5e_grp_pcie_update_stats, + }, + { + .get_num_stats = mlx5e_grp_per_prio_get_num_stats, + .fill_strings = mlx5e_grp_per_prio_fill_strings, + .fill_stats = mlx5e_grp_per_prio_fill_stats, + .update_stats = mlx5e_grp_per_prio_update_stats, + }, + { + .get_num_stats = mlx5e_grp_pme_get_num_stats, + .fill_strings = mlx5e_grp_pme_fill_strings, + .fill_stats = mlx5e_grp_pme_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_ipsec_get_num_stats, + .fill_strings = mlx5e_grp_ipsec_fill_strings, + .fill_stats = mlx5e_grp_ipsec_fill_stats, + .update_stats = mlx5e_grp_ipsec_update_stats, + }, + { + .get_num_stats = mlx5e_grp_tls_get_num_stats, + .fill_strings = mlx5e_grp_tls_fill_strings, + .fill_stats = mlx5e_grp_tls_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_channels_get_num_stats, + .fill_strings = mlx5e_grp_channels_fill_strings, + .fill_stats = mlx5e_grp_channels_fill_stats, + } +}; + +const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h new file mode 100644 index 000000000..3ea8033ed --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_STATS_H__ +#define __MLX5_EN_STATS_H__ + +#define MLX5E_READ_CTR64_CPU(ptr, dsc, i) \ + (*(u64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR64_BE(ptr, dsc, i) \ + be64_to_cpu(*(__be64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \ + (*(u32 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_BE(ptr, dsc, i) \ + be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) + +#define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) +#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld) + +struct counter_desc { + char format[ETH_GSTRING_LEN]; + size_t offset; /* Byte offset */ +}; + +struct mlx5e_sw_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 tx_tso_packets; + u64 tx_tso_bytes; + u64 tx_tso_inner_packets; + u64 tx_tso_inner_bytes; + u64 tx_added_vlan_packets; + u64 tx_nop; + u64 rx_lro_packets; + u64 rx_lro_bytes; + u64 rx_ecn_mark; + u64 rx_removed_vlan_packets; + u64 rx_csum_unnecessary; + u64 rx_csum_none; + u64 rx_csum_complete; + u64 rx_csum_complete_tail; + u64 rx_csum_complete_tail_slow; + u64 rx_csum_unnecessary_inner; + u64 rx_xdp_drop; + u64 rx_xdp_redirect; + u64 rx_xdp_tx_xmit; + u64 rx_xdp_tx_full; + u64 rx_xdp_tx_err; + u64 rx_xdp_tx_cqe; + u64 tx_csum_none; + u64 tx_csum_partial; + u64 tx_csum_partial_inner; + u64 tx_queue_stopped; + u64 tx_queue_dropped; + u64 tx_xmit_more; + u64 tx_recover; + u64 tx_cqes; + u64 tx_queue_wake; + u64 tx_cqe_err; + u64 tx_xdp_xmit; + u64 tx_xdp_full; + u64 tx_xdp_err; + u64 tx_xdp_cqes; + u64 rx_wqe_err; + u64 rx_mpwqe_filler_cqes; + u64 rx_mpwqe_filler_strides; + u64 rx_oversize_pkts_sw_drop; + u64 rx_buff_alloc_err; + u64 rx_cqe_compress_blks; + u64 rx_cqe_compress_pkts; + u64 rx_page_reuse; + u64 rx_cache_reuse; + u64 rx_cache_full; + u64 rx_cache_empty; + u64 rx_cache_busy; + u64 rx_cache_waive; + u64 rx_congst_umr; + u64 ch_events; + u64 ch_poll; + u64 ch_arm; + u64 ch_aff_change; + u64 ch_eq_rearm; + +#ifdef CONFIG_MLX5_EN_TLS + u64 tx_tls_ooo; + u64 tx_tls_resync_bytes; +#endif +}; + +struct mlx5e_qcounter_stats { + u32 rx_out_of_buffer; + u32 rx_if_down_packets; +}; + +struct mlx5e_vnic_env_stats { + __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; +}; + +#define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \ + vstats->query_vport_out, c) + +struct mlx5e_vport_stats { + __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)]; +}; + +#define PPORT_802_3_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +#define PPORT_2863_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +#define PPORT_2819_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +#define PPORT_PHY_STATISTICAL_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \ + counter_set.phys_layer_statistical_cntrs.c##_high) +#define PPORT_PER_PRIO_GET(pstats, prio, c) \ + MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +#define NUM_PPORT_PRIO 8 +#define PPORT_ETH_EXT_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) + +struct mlx5e_pport_stats { + __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2863_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; +}; + +#define PCIE_PERF_GET(pcie_stats, c) \ + MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ + counter_set.pcie_perf_cntrs_grp_data_layout.c) + +#define PCIE_PERF_GET64(pcie_stats, c) \ + MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ + counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) + +struct mlx5e_pcie_stats { + __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; +}; + +struct mlx5e_rq_stats { + u64 packets; + u64 bytes; + u64 csum_complete; + u64 csum_complete_tail; + u64 csum_complete_tail_slow; + u64 csum_unnecessary; + u64 csum_unnecessary_inner; + u64 csum_none; + u64 lro_packets; + u64 lro_bytes; + u64 ecn_mark; + u64 removed_vlan_packets; + u64 xdp_drop; + u64 xdp_redirect; + u64 wqe_err; + u64 mpwqe_filler_cqes; + u64 mpwqe_filler_strides; + u64 oversize_pkts_sw_drop; + u64 buff_alloc_err; + u64 cqe_compress_blks; + u64 cqe_compress_pkts; + u64 page_reuse; + u64 cache_reuse; + u64 cache_full; + u64 cache_empty; + u64 cache_busy; + u64 cache_waive; + u64 congst_umr; +}; + +struct mlx5e_sq_stats { + /* commonly accessed in data path */ + u64 packets; + u64 bytes; + u64 xmit_more; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 csum_partial; + u64 csum_partial_inner; + u64 added_vlan_packets; + u64 nop; +#ifdef CONFIG_MLX5_EN_TLS + u64 tls_ooo; + u64 tls_resync_bytes; +#endif + /* less likely accessed in data path */ + u64 csum_none; + u64 stopped; + u64 dropped; + u64 recover; + /* dirtied @completion */ + u64 cqes ____cacheline_aligned_in_smp; + u64 wake; + u64 cqe_err; +}; + +struct mlx5e_xdpsq_stats { + u64 xmit; + u64 full; + u64 err; + /* dirtied @completion */ + u64 cqes ____cacheline_aligned_in_smp; +}; + +struct mlx5e_ch_stats { + u64 events; + u64 poll; + u64 arm; + u64 aff_change; + u64 eq_rearm; +}; + +struct mlx5e_stats { + struct mlx5e_sw_stats sw; + struct mlx5e_qcounter_stats qcnt; + struct mlx5e_vnic_env_stats vnic; + struct mlx5e_vport_stats vport; + struct mlx5e_pport_stats pport; + struct rtnl_link_stats64 vf_vport; + struct mlx5e_pcie_stats pcie; +}; + +enum { + MLX5E_NDO_UPDATE_STATS = BIT(0x1), +}; + +struct mlx5e_priv; +struct mlx5e_stats_grp { + u16 update_stats_mask; + int (*get_num_stats)(struct mlx5e_priv *priv); + int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx); + int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx); + void (*update_stats)(struct mlx5e_priv *priv); +}; + +extern const struct mlx5e_stats_grp mlx5e_stats_grps[]; +extern const int mlx5e_num_stats_grps; + +void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv); + +#endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c new file mode 100644 index 000000000..305085377 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -0,0 +1,3063 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <net/flow_dissector.h> +#include <net/sch_generic.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_skbedit.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/device.h> +#include <linux/rhashtable.h> +#include <net/switchdev.h> +#include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_vlan.h> +#include <net/tc_act/tc_tunnel_key.h> +#include <net/tc_act/tc_pedit.h> +#include <net/tc_act/tc_csum.h> +#include <net/vxlan.h> +#include <net/arp.h> +#include "en.h" +#include "en_rep.h" +#include "en_tc.h" +#include "eswitch.h" +#include "lib/vxlan.h" +#include "fs_core.h" +#include "en/port.h" + +struct mlx5_nic_flow_attr { + u32 action; + u32 flow_tag; + u32 mod_hdr_id; + u32 hairpin_tirn; + u8 match_level; + struct mlx5_flow_table *hairpin_ft; +}; + +#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1) + +enum { + MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS, + MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS, + MLX5E_TC_FLOW_ESWITCH = BIT(MLX5E_TC_FLOW_BASE), + MLX5E_TC_FLOW_NIC = BIT(MLX5E_TC_FLOW_BASE + 1), + MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2), + MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3), + MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4), +}; + +#define MLX5E_TC_MAX_SPLITS 1 + +struct mlx5e_tc_flow { + struct rhash_head node; + struct mlx5e_priv *priv; + u64 cookie; + u8 flags; + struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; + struct list_head encap; /* flows sharing the same encap ID */ + struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ + struct list_head hairpin; /* flows sharing the same hairpin */ + union { + struct mlx5_esw_flow_attr esw_attr[0]; + struct mlx5_nic_flow_attr nic_attr[0]; + }; +}; + +struct mlx5e_tc_flow_parse_attr { + struct ip_tunnel_info tun_info; + struct mlx5_flow_spec spec; + int num_mod_hdr_actions; + int max_mod_hdr_actions; + void *mod_hdr_actions; + int mirred_ifindex; +}; + +enum { + MLX5_HEADER_TYPE_VXLAN = 0x0, + MLX5_HEADER_TYPE_NVGRE = 0x1, +}; + +#define MLX5E_TC_TABLE_NUM_GROUPS 4 +#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) + +struct mlx5e_hairpin { + struct mlx5_hairpin *pair; + + struct mlx5_core_dev *func_mdev; + struct mlx5e_priv *func_priv; + u32 tdn; + u32 tirn; + + int num_channels; + struct mlx5e_rqt indir_rqt; + u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_ttc_table ttc; +}; + +struct mlx5e_hairpin_entry { + /* a node of a hash table which keeps all the hairpin entries */ + struct hlist_node hairpin_hlist; + + /* flows sharing the same hairpin */ + struct list_head flows; + + u16 peer_vhca_id; + u8 prio; + struct mlx5e_hairpin *hp; +}; + +struct mod_hdr_key { + int num_actions; + void *actions; +}; + +struct mlx5e_mod_hdr_entry { + /* a node of a hash table which keeps all the mod_hdr entries */ + struct hlist_node mod_hdr_hlist; + + /* flows sharing the same mod_hdr entry */ + struct list_head flows; + + struct mod_hdr_key key; + + u32 mod_hdr_id; +}; + +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) + +static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key) +{ + return jhash(key->actions, + key->num_actions * MLX5_MH_ACT_SZ, 0); +} + +static inline int cmp_mod_hdr_info(struct mod_hdr_key *a, + struct mod_hdr_key *b) +{ + if (a->num_actions != b->num_actions) + return 1; + + return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ); +} + +static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int num_actions, actions_size, namespace, err; + struct mlx5e_mod_hdr_entry *mh; + struct mod_hdr_key key; + bool found = false; + u32 hash_key; + + num_actions = parse_attr->num_mod_hdr_actions; + actions_size = MLX5_MH_ACT_SZ * num_actions; + + key.actions = parse_attr->mod_hdr_actions; + key.num_actions = num_actions; + + hash_key = hash_mod_hdr_info(&key); + + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + namespace = MLX5_FLOW_NAMESPACE_FDB; + hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh, + mod_hdr_hlist, hash_key) { + if (!cmp_mod_hdr_info(&mh->key, &key)) { + found = true; + break; + } + } + } else { + namespace = MLX5_FLOW_NAMESPACE_KERNEL; + hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh, + mod_hdr_hlist, hash_key) { + if (!cmp_mod_hdr_info(&mh->key, &key)) { + found = true; + break; + } + } + } + + if (found) + goto attach_flow; + + mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL); + if (!mh) + return -ENOMEM; + + mh->key.actions = (void *)mh + sizeof(*mh); + memcpy(mh->key.actions, key.actions, actions_size); + mh->key.num_actions = num_actions; + INIT_LIST_HEAD(&mh->flows); + + err = mlx5_modify_header_alloc(priv->mdev, namespace, + mh->key.num_actions, + mh->key.actions, + &mh->mod_hdr_id); + if (err) + goto out_err; + + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); + else + hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); + +attach_flow: + list_add(&flow->mod_hdr, &mh->flows); + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + flow->esw_attr->mod_hdr_id = mh->mod_hdr_id; + else + flow->nic_attr->mod_hdr_id = mh->mod_hdr_id; + + return 0; + +out_err: + kfree(mh); + return err; +} + +static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct list_head *next = flow->mod_hdr.next; + + list_del(&flow->mod_hdr); + + if (list_empty(next)) { + struct mlx5e_mod_hdr_entry *mh; + + mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows); + + mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id); + hash_del(&mh->mod_hdr_hlist); + kfree(mh); + } +} + +static +struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) +{ + struct net_device *netdev; + struct mlx5e_priv *priv; + + netdev = __dev_get_by_index(net, ifindex); + priv = netdev_priv(netdev); + return priv->mdev; +} + +static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) +{ + u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0}; + void *tirc; + int err; + + err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn); + if (err) + goto alloc_tdn_err; + + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); + MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]); + MLX5_SET(tirc, tirc, transport_domain, hp->tdn); + + err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn); + if (err) + goto create_tir_err; + + return 0; + +create_tir_err: + mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); +alloc_tdn_err: + return err; +} + +static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) +{ + mlx5_core_destroy_tir(hp->func_mdev, hp->tirn); + mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); +} + +static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) +{ + u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn; + struct mlx5e_priv *priv = hp->func_priv; + int i, ix, sz = MLX5E_INDIR_RQT_SIZE; + + mlx5e_build_default_indir_rqt(indirection_rqt, sz, + hp->num_channels); + + for (i = 0; i < sz; i++) { + ix = i; + if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(i, ilog2(sz)); + ix = indirection_rqt[ix]; + rqn = hp->pair->rqn[ix]; + MLX5_SET(rqtc, rqtc, rq_num[i], rqn); + } +} + +static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) +{ + int inlen, err, sz = MLX5E_INDIR_RQT_SIZE; + struct mlx5e_priv *priv = hp->func_priv; + struct mlx5_core_dev *mdev = priv->mdev; + void *rqtc; + u32 *in; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + MLX5_SET(rqtc, rqtc, rqt_max_size, sz); + + mlx5e_hairpin_fill_rqt_rqns(hp, rqtc); + + err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn); + if (!err) + hp->indir_rqt.enabled = true; + + kvfree(in); + return err; +} + +static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + u32 in[MLX5_ST_SZ_DW(create_tir_in)]; + int tt, i, err; + void *tirc; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in)); + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + + MLX5_SET(tirc, tirc, transport_domain, hp->tdn); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); + + err = mlx5_core_create_tir(hp->func_mdev, in, + MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]); + if (err) { + mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_tirs; + } + } + return 0; + +err_destroy_tirs: + for (i = 0; i < tt; i++) + mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]); + return err; +} + +static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) +{ + int tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]); +} + +static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, + struct ttc_params *ttc_params) +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; + + memset(ttc_params, 0, sizeof(*ttc_params)); + + ttc_params->any_tt_tirn = hp->tirn; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params->indir_tirn[tt] = hp->indir_tirn[tt]; + + ft_attr->max_fte = MLX5E_NUM_TT; + ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_TC_PRIO; +} + +static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + struct ttc_params ttc_params; + int err; + + err = mlx5e_hairpin_create_indirect_rqt(hp); + if (err) + return err; + + err = mlx5e_hairpin_create_indirect_tirs(hp); + if (err) + goto err_create_indirect_tirs; + + mlx5e_hairpin_set_ttc_params(hp, &ttc_params); + err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc); + if (err) + goto err_create_ttc_table; + + netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", + hp->num_channels, hp->ttc.ft.t->id); + + return 0; + +err_create_ttc_table: + mlx5e_hairpin_destroy_indirect_tirs(hp); +err_create_indirect_tirs: + mlx5e_destroy_rqt(priv, &hp->indir_rqt); + + return err; +} + +static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + + mlx5e_destroy_ttc_table(priv, &hp->ttc); + mlx5e_hairpin_destroy_indirect_tirs(hp); + mlx5e_destroy_rqt(priv, &hp->indir_rqt); +} + +static struct mlx5e_hairpin * +mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params, + int peer_ifindex) +{ + struct mlx5_core_dev *func_mdev, *peer_mdev; + struct mlx5e_hairpin *hp; + struct mlx5_hairpin *pair; + int err; + + hp = kzalloc(sizeof(*hp), GFP_KERNEL); + if (!hp) + return ERR_PTR(-ENOMEM); + + func_mdev = priv->mdev; + peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + + pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); + if (IS_ERR(pair)) { + err = PTR_ERR(pair); + goto create_pair_err; + } + hp->pair = pair; + hp->func_mdev = func_mdev; + hp->func_priv = priv; + hp->num_channels = params->num_channels; + + err = mlx5e_hairpin_create_transport(hp); + if (err) + goto create_transport_err; + + if (hp->num_channels > 1) { + err = mlx5e_hairpin_rss_init(hp); + if (err) + goto rss_init_err; + } + + return hp; + +rss_init_err: + mlx5e_hairpin_destroy_transport(hp); +create_transport_err: + mlx5_core_hairpin_destroy(hp->pair); +create_pair_err: + kfree(hp); + return ERR_PTR(err); +} + +static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) +{ + if (hp->num_channels > 1) + mlx5e_hairpin_rss_cleanup(hp); + mlx5e_hairpin_destroy_transport(hp); + mlx5_core_hairpin_destroy(hp->pair); + kvfree(hp); +} + +static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio) +{ + return (peer_vhca_id << 16 | prio); +} + +static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, + u16 peer_vhca_id, u8 prio) +{ + struct mlx5e_hairpin_entry *hpe; + u32 hash_key = hash_hairpin_info(peer_vhca_id, prio); + + hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, + hairpin_hlist, hash_key) { + if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) + return hpe; + } + + return NULL; +} + +#define UNKNOWN_MATCH_PRIO 8 + +static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, u8 *match_prio) +{ + void *headers_c, *headers_v; + u8 prio_val, prio_mask = 0; + bool vlan_present; + +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) { + netdev_warn(priv->netdev, + "only PCP trust state supported for hairpin\n"); + return -EOPNOTSUPP; + } +#endif + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + + vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag); + if (vlan_present) { + prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); + prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); + } + + if (!vlan_present || !prio_mask) { + prio_val = UNKNOWN_MATCH_PRIO; + } else if (prio_mask != 0x7) { + netdev_warn(priv->netdev, + "masked priority match not supported for hairpin\n"); + return -EOPNOTSUPP; + } + + *match_prio = prio_val; + return 0; +} + +static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + int peer_ifindex = parse_attr->mirred_ifindex; + struct mlx5_hairpin_params params; + struct mlx5_core_dev *peer_mdev; + struct mlx5e_hairpin_entry *hpe; + struct mlx5e_hairpin *hp; + u64 link_speed64; + u32 link_speed; + u8 match_prio; + u16 peer_id; + int err; + + peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { + netdev_warn(priv->netdev, "hairpin is not supported\n"); + return -EOPNOTSUPP; + } + + peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); + err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio); + if (err) + return err; + hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); + if (hpe) + goto attach_flow; + + hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); + if (!hpe) + return -ENOMEM; + + INIT_LIST_HEAD(&hpe->flows); + hpe->peer_vhca_id = peer_id; + hpe->prio = match_prio; + + params.log_data_size = 15; + params.log_data_size = min_t(u8, params.log_data_size, + MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); + params.log_data_size = max_t(u8, params.log_data_size, + MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz)); + + params.log_num_packets = params.log_data_size - + MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev); + params.log_num_packets = min_t(u8, params.log_num_packets, + MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets)); + + params.q_counter = priv->q_counter; + /* set hairpin pair per each 50Gbs share of the link */ + mlx5e_port_max_linkspeed(priv->mdev, &link_speed); + link_speed = max_t(u32, link_speed, 50000); + link_speed64 = link_speed; + do_div(link_speed64, 50000); + params.num_channels = link_speed64; + + hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); + if (IS_ERR(hp)) { + err = PTR_ERR(hp); + goto create_hairpin_err; + } + + netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", + hp->tirn, hp->pair->rqn[0], hp->pair->peer_mdev->priv.name, + hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); + + hpe->hp = hp; + hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, + hash_hairpin_info(peer_id, match_prio)); + +attach_flow: + if (hpe->hp->num_channels > 1) { + flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS; + flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; + } else { + flow->nic_attr->hairpin_tirn = hpe->hp->tirn; + } + list_add(&flow->hairpin, &hpe->flows); + + return 0; + +create_hairpin_err: + kfree(hpe); + return err; +} + +static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct list_head *next = flow->hairpin.next; + + list_del(&flow->hairpin); + + /* no more hairpin flows for us, release the hairpin pair */ + if (list_empty(next)) { + struct mlx5e_hairpin_entry *hpe; + + hpe = list_entry(next, struct mlx5e_hairpin_entry, flows); + + netdev_dbg(priv->netdev, "del hairpin: peer %s\n", + hpe->hp->pair->peer_mdev->priv.name); + + mlx5e_hairpin_destroy(hpe->hp); + hash_del(&hpe->hairpin_hlist); + kfree(hpe); + } +} + +static struct mlx5_flow_handle * +mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_nic_flow_attr *attr = flow->nic_attr; + struct mlx5_core_dev *dev = priv->mdev; + struct mlx5_flow_destination dest[2] = {}; + struct mlx5_flow_act flow_act = { + .action = attr->action, + .has_flow_tag = true, + .flow_tag = attr->flow_tag, + .encap_id = 0, + }; + struct mlx5_fc *counter = NULL; + struct mlx5_flow_handle *rule; + bool table_created = false; + int err, dest_ix = 0; + + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { + err = mlx5e_hairpin_flow_add(priv, flow, parse_attr); + if (err) { + rule = ERR_PTR(err); + goto err_add_hairpin_flow; + } + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = attr->hairpin_ft; + } else { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest[dest_ix].tir_num = attr->hairpin_tirn; + } + dest_ix++; + } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = priv->fs.vlan.ft.t; + dest_ix++; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + counter = mlx5_fc_create(dev, true); + if (IS_ERR(counter)) { + rule = ERR_CAST(counter); + goto err_fc_create; + } + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[dest_ix].counter = counter; + dest_ix++; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); + flow_act.modify_id = attr->mod_hdr_id; + kfree(parse_attr->mod_hdr_actions); + if (err) { + rule = ERR_PTR(err); + goto err_create_mod_hdr_id; + } + } + + if (IS_ERR_OR_NULL(priv->fs.tc.t)) { + int tc_grp_size, tc_tbl_size; + u32 max_flow_counter; + + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + + tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE); + + tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, + BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); + + priv->fs.tc.t = + mlx5_create_auto_grouped_flow_table(priv->fs.ns, + MLX5E_TC_PRIO, + tc_tbl_size, + MLX5E_TC_TABLE_NUM_GROUPS, + MLX5E_TC_FT_LEVEL, 0); + if (IS_ERR(priv->fs.tc.t)) { + netdev_err(priv->netdev, + "Failed to create tc offload table\n"); + rule = ERR_CAST(priv->fs.tc.t); + goto err_create_ft; + } + + table_created = true; + } + + if (attr->match_level != MLX5_MATCH_NONE) + parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, + &flow_act, dest, dest_ix); + + if (IS_ERR(rule)) + goto err_add_rule; + + return rule; + +err_add_rule: + if (table_created) { + mlx5_destroy_flow_table(priv->fs.tc.t); + priv->fs.tc.t = NULL; + } +err_create_ft: + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5e_detach_mod_hdr(priv, flow); +err_create_mod_hdr_id: + mlx5_fc_destroy(dev, counter); +err_fc_create: + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) + mlx5e_hairpin_flow_del(priv, flow); +err_add_hairpin_flow: + return rule; +} + +static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_nic_flow_attr *attr = flow->nic_attr; + struct mlx5_fc *counter = NULL; + + counter = mlx5_flow_rule_counter(flow->rule[0]); + mlx5_del_flow_rules(flow->rule[0]); + mlx5_fc_destroy(priv->mdev, counter); + + if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) { + mlx5_destroy_flow_table(priv->fs.tc.t); + priv->fs.tc.t = NULL; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5e_detach_mod_hdr(priv, flow); + + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) + mlx5e_hairpin_flow_del(priv, flow); +} + +static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +static int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct ip_tunnel_info *tun_info, + struct net_device *mirred_dev, + struct net_device **encap_dev, + struct mlx5e_tc_flow *flow); + +static struct mlx5_flow_handle * +mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct net_device *out_dev, *encap_dev = NULL; + struct mlx5_flow_handle *rule = NULL; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *out_priv; + int err; + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + out_dev = __dev_get_by_index(dev_net(priv->netdev), + attr->parse_attr->mirred_ifindex); + err = mlx5e_attach_encap(priv, &parse_attr->tun_info, + out_dev, &encap_dev, flow); + if (err) { + rule = ERR_PTR(err); + if (err != -EAGAIN) + goto err_attach_encap; + } + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + attr->out_rep[attr->out_count] = rpriv->rep; + attr->out_mdev[attr->out_count++] = out_priv->mdev; + } + + err = mlx5_eswitch_add_vlan_action(esw, attr); + if (err) { + rule = ERR_PTR(err); + goto err_add_vlan; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); + kfree(parse_attr->mod_hdr_actions); + if (err) { + rule = ERR_PTR(err); + goto err_mod_hdr; + } + } + + /* we get here if (1) there's no error (rule being null) or when + * (2) there's an encap action and we're on -EAGAIN (no valid neigh) + */ + if (rule != ERR_PTR(-EAGAIN)) { + rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); + if (IS_ERR(rule)) + goto err_add_rule; + + if (attr->mirror_count) { + flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &parse_attr->spec, attr); + if (IS_ERR(flow->rule[1])) + goto err_fwd_rule; + } + } + return rule; + +err_fwd_rule: + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + rule = flow->rule[1]; +err_add_rule: + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5e_detach_mod_hdr(priv, flow); +err_mod_hdr: + mlx5_eswitch_del_vlan_action(esw, attr); +err_add_vlan: + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + mlx5e_detach_encap(priv, flow); +err_attach_encap: + return rule; +} + +static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + if (attr->mirror_count) + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr); + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + } + + mlx5_eswitch_del_vlan_action(esw, attr); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + mlx5e_detach_encap(priv, flow); + kvfree(attr->parse_attr); + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5e_detach_mod_hdr(priv, flow); +} + +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5e_tc_flow *flow; + int err; + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + e->encap_size, e->encap_header, + &e->encap_id); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n", + err); + return; + } + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(priv); + + list_for_each_entry(flow, &e->flows, encap) { + esw_attr = flow->esw_attr; + esw_attr->encap_id = e->encap_id; + flow->rule[0] = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr); + if (IS_ERR(flow->rule[0])) { + err = PTR_ERR(flow->rule[0]); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + continue; + } + + if (esw_attr->mirror_count) { + flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &esw_attr->parse_attr->spec, esw_attr); + if (IS_ERR(flow->rule[1])) { + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], esw_attr); + err = PTR_ERR(flow->rule[1]); + mlx5_core_warn(priv->mdev, "Failed to update cached mirror flow, %d\n", + err); + continue; + } + } + + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + } +} + +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, &e->flows, encap) { + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + if (attr->mirror_count) + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr); + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + } + } + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_encap_dealloc(priv->mdev, e->encap_id); + } +} + +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh *m_neigh = &nhe->m_neigh; + struct mlx5e_tc_flow *flow; + struct mlx5e_encap_entry *e; + struct mlx5_fc *counter; + struct neigh_table *tbl; + bool neigh_used = false; + struct neighbour *n; + u64 lastuse; + + if (m_neigh->family == AF_INET) + tbl = &arp_tbl; +#if IS_ENABLED(CONFIG_IPV6) + else if (m_neigh->family == AF_INET6) + tbl = &nd_tbl; +#endif + else + return; + + list_for_each_entry(e, &nhe->encap_list, encap_list) { + if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) + continue; + list_for_each_entry(flow, &e->flows, encap) { + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + counter = mlx5_flow_rule_counter(flow->rule[0]); + lastuse = mlx5_fc_query_lastuse(counter); + if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { + neigh_used = true; + break; + } + } + } + if (neigh_used) + break; + } + + if (neigh_used) { + nhe->reported_lastuse = jiffies; + + /* find the relevant neigh according to the cached device and + * dst ip pair + */ + n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev); + if (!n) + return; + + neigh_event_send(n, NULL); + neigh_release(n); + } +} + +static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct list_head *next = flow->encap.next; + + list_del(&flow->encap); + if (list_empty(next)) { + struct mlx5e_encap_entry *e; + + e = list_entry(next, struct mlx5e_encap_entry, flows); + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + mlx5_encap_dealloc(priv->mdev, e->encap_id); + + hash_del_rcu(&e->encap_hlist); + kfree(e->encap_header); + kfree(e); + } +} + +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + mlx5e_tc_del_fdb_flow(priv, flow); + else + mlx5e_tc_del_nic_flow(priv, flow); +} + +static void parse_vxlan_attr(struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->key); + struct flow_dissector_key_keyid *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(key->keyid)); + } +} + +static int parse_tunnel_attr(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + + struct flow_dissector_key_control *enc_control = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + f->key); + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + f->mask); + + /* Full udp dst port must be given */ + if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) + goto vxlan_match_offload_err; + + if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) && + MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) + parse_vxlan_attr(spec, f); + else { + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst)); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(key->dst)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(key->src)); + } else { /* udp dst port must be given */ +vxlan_match_offload_err: + netdev_warn(priv->netdev, + "IP tunnel decap offload supported only for vxlan, must set UDP dport\n"); + return -EOPNOTSUPP; + } + + if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + f->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + f->mask); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(key->dst)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); + } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + f->mask); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_dissector_key_ip *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IP, + f->key); + struct flow_dissector_key_ip *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IP, + f->mask); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + } + + /* Enforce DMAC when offloading incoming tunneled flows. + * Flow counters require a match on the DMAC. + */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), priv->netdev->dev_addr); + + /* let software handle IP fragments */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + + return 0; +} + +static int __parse_cls_flower(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + u8 *match_level) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + u16 addr_type = 0; + u8 ip_proto = 0; + + *match_level = MLX5_MATCH_NONE; + + if (f->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_CVLAN) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_TCP) | + BIT(FLOW_DISSECTOR_KEY_IP) | + BIT(FLOW_DISSECTOR_KEY_ENC_IP))) { + netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", + f->dissector->used_keys); + return -EOPNOTSUPP; + } + + if ((dissector_uses_key(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || + dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) || + dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) && + dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + f->key); + switch (key->addr_type) { + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: + if (parse_tunnel_attr(priv, spec, f)) + return -EOPNOTSUPP; + break; + default: + return -EOPNOTSUPP; + } + + /* In decap flow, header pointers should point to the inner + * headers, outer header were already set by parse_tunnel_attr + */ + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + inner_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + inner_headers); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->mask); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, + ntohs(mask->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ntohs(key->n_proto)); + + if (mask->n_proto) + *match_level = MLX5_MATCH_L2; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) { + if (key->vlan_tpid == htons(ETH_P_8021AD)) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + svlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + svlan_tag, 1); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + cvlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + cvlan_tag, 1); + } + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority); + + *match_level = MLX5_MATCH_L2; + } + } else if (*match_level != MLX5_MATCH_NONE) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + *match_level = MLX5_MATCH_L2; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_CVLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_CVLAN, + f->mask); + if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) { + if (key->vlan_tpid == htons(ETH_P_8021AD)) { + MLX5_SET(fte_match_set_misc, misc_c, + outer_second_svlan_tag, 1); + MLX5_SET(fte_match_set_misc, misc_v, + outer_second_svlan_tag, 1); + } else { + MLX5_SET(fte_match_set_misc, misc_c, + outer_second_cvlan_tag, 1); + MLX5_SET(fte_match_set_misc, misc_v, + outer_second_cvlan_tag, 1); + } + + MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, + mask->vlan_id); + MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, + key->vlan_id); + MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, + mask->vlan_priority); + MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, + key->vlan_priority); + + *match_level = MLX5_MATCH_L2; + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_dissector_key_eth_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->key); + struct flow_dissector_key_eth_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->mask); + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dmac_47_16), + mask->dst); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), + key->dst); + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + smac_47_16), + mask->src); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + smac_47_16), + key->src); + + if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst)) + *match_level = MLX5_MATCH_L2; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_CONTROL, + f->key); + + struct flow_dissector_key_control *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_CONTROL, + f->mask); + addr_type = key->addr_type; + + /* the HW doesn't support frag first/later */ + if (mask->flags & FLOW_DIS_FIRST_FRAG) + return -EOPNOTSUPP; + + if (mask->flags & FLOW_DIS_IS_FRAGMENT) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, + key->flags & FLOW_DIS_IS_FRAGMENT); + + /* the HW doesn't need L3 inline to match on frag=no */ + if (!(key->flags & FLOW_DIS_IS_FRAGMENT)) + *match_level = MLX5_MATCH_L2; + /* *** L2 attributes parsing up to here *** */ + else + *match_level = MLX5_MATCH_L3; + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->mask); + ip_proto = key->ip_proto; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + mask->ip_proto); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + key->ip_proto); + + if (mask->ip_proto) + *match_level = MLX5_MATCH_L3; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->mask); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &mask->src, sizeof(mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &key->src, sizeof(key->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &mask->dst, sizeof(mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &key->dst, sizeof(key->dst)); + + if (mask->src || mask->dst) + *match_level = MLX5_MATCH_L3; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->mask); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &mask->src, sizeof(mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &key->src, sizeof(key->src)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &mask->dst, sizeof(mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &key->dst, sizeof(key->dst)); + + if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY || + ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY) + *match_level = MLX5_MATCH_L3; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) { + struct flow_dissector_key_ip *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->key); + struct flow_dissector_key_ip *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->mask); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + + if (mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_ipv4_ttl)) + return -EOPNOTSUPP; + + if (mask->tos || mask->ttl) + *match_level = MLX5_MATCH_L3; + } + + /* *** L3 attributes parsing up to here *** */ + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->mask); + switch (ip_proto) { + case IPPROTO_TCP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_sport, ntohs(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_sport, ntohs(key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_dport, ntohs(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_dport, ntohs(key->dst)); + break; + + case IPPROTO_UDP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(key->dst)); + break; + default: + netdev_err(priv->netdev, + "Only UDP and TCP transport are supported\n"); + return -EINVAL; + } + + if (mask->src || mask->dst) + *match_level = MLX5_MATCH_L4; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_dissector_key_tcp *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_TCP, + f->key); + struct flow_dissector_key_tcp *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_TCP, + f->mask); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(mask->flags)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, + ntohs(key->flags)); + + if (mask->flags) + *match_level = MLX5_MATCH_L4; + } + + return 0; +} + +static int parse_cls_flower(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + struct mlx5_core_dev *dev = priv->mdev; + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; + u8 match_level; + int err; + + err = __parse_cls_flower(priv, spec, f, &match_level); + + if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { + rep = rpriv->rep; + if (rep->vport != FDB_UPLINK_VPORT && + (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && + esw->offloads.inline_mode < match_level)) { + netdev_warn(priv->netdev, + "Flow is not offloaded due to min inline setting, required %d actual %d\n", + match_level, esw->offloads.inline_mode); + return -EOPNOTSUPP; + } + } + + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + flow->esw_attr->match_level = match_level; + else + flow->nic_attr->match_level = match_level; + + return err; +} + +struct pedit_headers { + struct ethhdr eth; + struct iphdr ip4; + struct ipv6hdr ip6; + struct tcphdr tcp; + struct udphdr udp; +}; + +static int pedit_header_offsets[] = { + [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), + [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), + [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), + [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), + [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), +}; + +#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) + +static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, + struct pedit_headers *masks, + struct pedit_headers *vals) +{ + u32 *curr_pmask, *curr_pval; + + if (hdr_type >= __PEDIT_HDR_TYPE_MAX) + goto out_err; + + curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset); + curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset); + + if (*curr_pmask & mask) /* disallow acting twice on the same location */ + goto out_err; + + *curr_pmask |= mask; + *curr_pval |= (val & mask); + + return 0; + +out_err: + return -EOPNOTSUPP; +} + +struct mlx5_fields { + u8 field; + u8 size; + u32 offset; +}; + +#define OFFLOAD(fw_field, size, field, off) \ + {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)} + +static struct mlx5_fields fields[] = { + OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0), + OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0), + OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0), + OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0), + OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0), + + OFFLOAD(IP_TTL, 1, ip4.ttl, 0), + OFFLOAD(SIPV4, 4, ip4.saddr, 0), + OFFLOAD(DIPV4, 4, ip4.daddr, 0), + + OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0), + OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0), + OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0), + OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0), + OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0), + OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0), + OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0), + OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0), + OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0), + + OFFLOAD(TCP_SPORT, 2, tcp.source, 0), + OFFLOAD(TCP_DPORT, 2, tcp.dest, 0), + OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5), + + OFFLOAD(UDP_SPORT, 2, udp.source, 0), + OFFLOAD(UDP_DPORT, 2, udp.dest, 0), +}; + +/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at + * max from the SW pedit action. On success, attr->num_mod_hdr_actions + * says how many HW actions were actually parsed. + */ +static int offload_pedit_fields(struct pedit_headers *masks, + struct pedit_headers *vals, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; + int i, action_size, nactions, max_actions, first, last, next_z; + void *s_masks_p, *a_masks_p, *vals_p; + struct mlx5_fields *f; + u8 cmd, field_bsize; + u32 s_mask, a_mask; + unsigned long mask; + __be32 mask_be32; + __be16 mask_be16; + void *action; + + set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET]; + add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD]; + set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET]; + add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD]; + + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + action = parse_attr->mod_hdr_actions + + parse_attr->num_mod_hdr_actions * action_size; + + max_actions = parse_attr->max_mod_hdr_actions; + nactions = parse_attr->num_mod_hdr_actions; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; + /* avoid seeing bits set from previous iterations */ + s_mask = 0; + a_mask = 0; + + s_masks_p = (void *)set_masks + f->offset; + a_masks_p = (void *)add_masks + f->offset; + + memcpy(&s_mask, s_masks_p, f->size); + memcpy(&a_mask, a_masks_p, f->size); + + if (!s_mask && !a_mask) /* nothing to offload here */ + continue; + + if (s_mask && a_mask) { + printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field); + return -EOPNOTSUPP; + } + + if (nactions == max_actions) { + printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions); + return -EOPNOTSUPP; + } + + if (s_mask) { + cmd = MLX5_ACTION_TYPE_SET; + mask = s_mask; + vals_p = (void *)set_vals + f->offset; + /* clear to denote we consumed this field */ + memset(s_masks_p, 0, f->size); + } else { + cmd = MLX5_ACTION_TYPE_ADD; + mask = a_mask; + vals_p = (void *)add_vals + f->offset; + /* clear to denote we consumed this field */ + memset(a_masks_p, 0, f->size); + } + + field_bsize = f->size * BITS_PER_BYTE; + + if (field_bsize == 32) { + mask_be32 = *(__be32 *)&mask; + mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); + } else if (field_bsize == 16) { + mask_be16 = *(__be16 *)&mask; + mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); + } + + first = find_first_bit(&mask, field_bsize); + next_z = find_next_zero_bit(&mask, field_bsize, first); + last = find_last_bit(&mask, field_bsize); + if (first < next_z && next_z < last) { + printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", + mask); + return -EOPNOTSUPP; + } + + MLX5_SET(set_action_in, action, action_type, cmd); + MLX5_SET(set_action_in, action, field, f->field); + + if (cmd == MLX5_ACTION_TYPE_SET) { + MLX5_SET(set_action_in, action, offset, first); + /* length is num of bits to be written, zero means length of 32 */ + MLX5_SET(set_action_in, action, length, (last - first + 1)); + } + + if (field_bsize == 32) + MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first); + else if (field_bsize == 16) + MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first); + else if (field_bsize == 8) + MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); + + action += action_size; + nactions++; + } + + parse_attr->num_mod_hdr_actions = nactions; + return 0; +} + +static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, + const struct tc_action *a, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + int nkeys, action_size, max_actions; + + nkeys = tcf_pedit_nkeys(a); + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + + if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ + max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions); + else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ + max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions); + + /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */ + max_actions = min(max_actions, nkeys * 16); + + parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL); + if (!parse_attr->mod_hdr_actions) + return -ENOMEM; + + parse_attr->max_mod_hdr_actions = max_actions; + return 0; +} + +static const struct pedit_headers zero_masks = {}; + +static int parse_tc_pedit_action(struct mlx5e_priv *priv, + const struct tc_action *a, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks; + int nkeys, i, err = -EOPNOTSUPP; + u32 mask, val, offset; + u8 cmd, htype; + + nkeys = tcf_pedit_nkeys(a); + + memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); + memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); + + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + cmd = tcf_pedit_cmd(a, i); + err = -EOPNOTSUPP; /* can't be all optimistic */ + + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) { + netdev_warn(priv->netdev, "legacy pedit isn't offloaded\n"); + goto out_err; + } + + if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) { + netdev_warn(priv->netdev, "pedit cmd %d isn't offloaded\n", cmd); + goto out_err; + } + + mask = tcf_pedit_mask(a, i); + val = tcf_pedit_val(a, i); + offset = tcf_pedit_offset(a, i); + + err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]); + if (err) + goto out_err; + } + + if (!parse_attr->mod_hdr_actions) { + err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); + if (err) + goto out_err; + } + + err = offload_pedit_fields(masks, vals, parse_attr); + if (err < 0) + goto out_dealloc_parsed_actions; + + for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { + cmd_masks = &masks[cmd]; + if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { + netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); + print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, + 16, 1, cmd_masks, sizeof(zero_masks), true); + err = -EOPNOTSUPP; + goto out_dealloc_parsed_actions; + } + } + + return 0; + +out_dealloc_parsed_actions: + kfree(parse_attr->mod_hdr_actions); +out_err: + return err; +} + +static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags) +{ + u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP | + TCA_CSUM_UPDATE_FLAG_UDP; + + /* The HW recalcs checksums only if re-writing headers */ + if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) { + netdev_warn(priv->netdev, + "TC csum action is only offloaded with pedit\n"); + return false; + } + + if (update_flags & ~prot_flags) { + netdev_warn(priv->netdev, + "can't offload TC csum action for some header/s - flags %#x\n", + update_flags); + return false; + } + + return true; +} + +static bool modify_header_match_supported(struct mlx5_flow_spec *spec, + struct tcf_exts *exts) +{ + const struct tc_action *a; + bool modify_ip_header; + LIST_HEAD(actions); + u8 htype, ip_proto; + void *headers_v; + u16 ethertype; + int nkeys, i; + + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); + + /* for non-IP we only re-write MACs, so we're okay */ + if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) + goto out_ok; + + modify_ip_header = false; + tcf_exts_for_each_action(i, a, exts) { + int k; + + if (!is_tcf_pedit(a)) + continue; + + nkeys = tcf_pedit_nkeys(a); + for (k = 0; k < nkeys; k++) { + htype = tcf_pedit_htype(a, k); + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || + htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { + modify_ip_header = true; + break; + } + } + } + + ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); + if (modify_ip_header && ip_proto != IPPROTO_TCP && + ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { + pr_info("can't offload re-write of ip proto %d\n", ip_proto); + return false; + } + +out_ok: + return true; +} + +static bool actions_match_supported(struct mlx5e_priv *priv, + struct tcf_exts *exts, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) +{ + u32 actions; + + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + actions = flow->esw_attr->action; + else + actions = flow->nic_attr->action; + + if (flow->flags & MLX5E_TC_FLOW_EGRESS && + !(actions & MLX5_FLOW_CONTEXT_ACTION_DECAP)) + return false; + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + return modify_header_match_supported(&parse_attr->spec, exts); + + return true; +} + +static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) +{ + struct mlx5_core_dev *fmdev, *pmdev; + u64 fsystem_guid, psystem_guid; + + fmdev = priv->mdev; + pmdev = peer_priv->mdev; + + mlx5_query_nic_vport_system_image_guid(fmdev, &fsystem_guid); + mlx5_query_nic_vport_system_image_guid(pmdev, &psystem_guid); + + return (fsystem_guid == psystem_guid); +} + +static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_nic_flow_attr *attr = flow->nic_attr; + const struct tc_action *a; + LIST_HEAD(actions); + u32 action = 0; + int err, i; + + if (!tcf_exts_has_actions(exts)) + return -EINVAL; + + attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + + tcf_exts_for_each_action(i, a, exts) { + if (is_tcf_gact_shot(a)) { + action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + if (MLX5_CAP_FLOWTABLE(priv->mdev, + flow_table_properties_nic_receive.flow_counter)) + action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + continue; + } + + if (is_tcf_pedit(a)) { + err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr); + if (err) + return err; + + action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + continue; + } + + if (is_tcf_csum(a)) { + if (csum_offload_supported(priv, action, + tcf_csum_update_flags(a))) + continue; + + return -EOPNOTSUPP; + } + + if (is_tcf_mirred_egress_redirect(a)) { + struct net_device *peer_dev = tcf_mirred_dev(a); + + if (priv->netdev->netdev_ops == peer_dev->netdev_ops && + same_hw_devs(priv, netdev_priv(peer_dev))) { + parse_attr->mirred_ifindex = peer_dev->ifindex; + flow->flags |= MLX5E_TC_FLOW_HAIRPIN; + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + } else { + netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n", + peer_dev->name); + return -EINVAL; + } + continue; + } + + if (is_tcf_skbedit_mark(a)) { + u32 mark = tcf_skbedit_mark(a); + + if (mark & ~MLX5E_TC_FLOW_ID_MASK) { + netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n", + mark); + return -EINVAL; + } + + attr->flow_tag = mark; + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + continue; + } + + return -EINVAL; + } + + attr->action = action; + if (!actions_match_supported(priv, exts, parse_attr, flow)) + return -EOPNOTSUPP; + + return 0; +} + +static inline int cmp_encap_info(struct ip_tunnel_key *a, + struct ip_tunnel_key *b) +{ + return memcmp(a, b, sizeof(*a)); +} + +static inline int hash_encap_info(struct ip_tunnel_key *key) +{ + return jhash(key, sizeof(*key), 0); +} + +static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct flowi4 *fl4, + struct neighbour **out_n, + u8 *out_ttl) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *uplink_rpriv; + struct rtable *rt; + struct neighbour *n = NULL; + +#if IS_ENABLED(CONFIG_INET) + int ret; + + rt = ip_route_output_key(dev_net(mirred_dev), fl4); + ret = PTR_ERR_OR_ZERO(rt); + if (ret) + return ret; +#else + return -EOPNOTSUPP; +#endif + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + /* if the egress device isn't on the same HW e-switch, we use the uplink */ + if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) + *out_dev = uplink_rpriv->netdev; + else + *out_dev = rt->dst.dev; + + if (!(*out_ttl)) + *out_ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &fl4->daddr); + ip_rt_put(rt); + if (!n) + return -ENOMEM; + + *out_n = n; + return 0; +} + +static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, + struct net_device *peer_netdev) +{ + struct mlx5e_priv *peer_priv; + + peer_priv = netdev_priv(peer_netdev); + + return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && + (priv->netdev->netdev_ops == peer_netdev->netdev_ops) && + same_hw_devs(priv, peer_priv) && + MLX5_VPORT_MANAGER(peer_priv->mdev) && + (peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS)); +} + +static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct flowi6 *fl6, + struct neighbour **out_n, + u8 *out_ttl) +{ + struct neighbour *n = NULL; + struct dst_entry *dst; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6, + NULL); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + if (!(*out_ttl)) + *out_ttl = ip6_dst_hoplimit(dst); + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + /* if the egress device isn't on the same HW e-switch, we use the uplink */ + if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) + *out_dev = uplink_rpriv->netdev; + else + *out_dev = dst->dev; +#else + return -EOPNOTSUPP; +#endif + + n = dst_neigh_lookup(dst, &fl6->daddr); + dst_release(dst); + if (!n) + return -ENOMEM; + + *out_n = n; + return 0; +} + +static void gen_vxlan_header_ipv4(struct net_device *out_dev, + char buf[], int encap_size, + unsigned char h_dest[ETH_ALEN], + u8 tos, u8 ttl, + __be32 daddr, + __be32 saddr, + __be16 udp_dst_port, + __be32 vx_vni) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); + struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr)); + struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + + memset(buf, 0, encap_size); + + ether_addr_copy(eth->h_dest, h_dest); + ether_addr_copy(eth->h_source, out_dev->dev_addr); + eth->h_proto = htons(ETH_P_IP); + + ip->daddr = daddr; + ip->saddr = saddr; + + ip->tos = tos; + ip->ttl = ttl; + ip->protocol = IPPROTO_UDP; + ip->version = 0x4; + ip->ihl = 0x5; + + udp->dest = udp_dst_port; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(vx_vni); +} + +static void gen_vxlan_header_ipv6(struct net_device *out_dev, + char buf[], int encap_size, + unsigned char h_dest[ETH_ALEN], + u8 tos, u8 ttl, + struct in6_addr *daddr, + struct in6_addr *saddr, + __be16 udp_dst_port, + __be32 vx_vni) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); + struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr)); + struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + + memset(buf, 0, encap_size); + + ether_addr_copy(eth->h_dest, h_dest); + ether_addr_copy(eth->h_source, out_dev->dev_addr); + eth->h_proto = htons(ETH_P_IPV6); + + ip6_flow_hdr(ip6h, tos, 0); + /* the HW fills up ipv6 payload len */ + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + + udp->dest = udp_dst_port; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(vx_vni); +} + +static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN; + struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev; + struct neighbour *n = NULL; + struct flowi4 fl4 = {}; + u8 nud_state, tos, ttl; + char *encap_header; + int err; + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + fl4.flowi4_proto = IPPROTO_UDP; + fl4.fl4_dport = tun_key->tp_dst; + break; + default: + err = -EOPNOTSUPP; + goto free_encap; + } + + tos = tun_key->tos; + ttl = tun_key->ttl; + + fl4.flowi4_tos = tun_key->tos; + fl4.daddr = tun_key->u.ipv4.dst; + fl4.saddr = tun_key->u.ipv4.src; + + err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, + &fl4, &n, &ttl); + if (err) + goto free_encap; + + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) + goto free_encap; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + gen_vxlan_header_ipv4(out_dev, encap_header, + ipv4_encap_size, e->h_dest, tos, ttl, + fl4.daddr, + fl4.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); + break; + default: + err = -EOPNOTSUPP; + goto destroy_neigh_entry; + } + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + err = -EAGAIN; + goto out; + } + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + ipv4_encap_size, encap_header, &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +out: + if (n) + neigh_release(n); + return err; +} + +static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN; + struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev = NULL; + struct neighbour *n = NULL; + struct flowi6 fl6 = {}; + u8 nud_state, tos, ttl; + char *encap_header; + int err; + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + fl6.flowi6_proto = IPPROTO_UDP; + fl6.fl6_dport = tun_key->tp_dst; + break; + default: + err = -EOPNOTSUPP; + goto free_encap; + } + + tos = tun_key->tos; + ttl = tun_key->ttl; + + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + fl6.daddr = tun_key->u.ipv6.dst; + fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, + &fl6, &n, &ttl); + if (err) + goto free_encap; + + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) + goto free_encap; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + gen_vxlan_header_ipv6(out_dev, encap_header, + ipv6_encap_size, e->h_dest, tos, ttl, + &fl6.daddr, + &fl6.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); + break; + default: + err = -EOPNOTSUPP; + goto destroy_neigh_entry; + } + + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + err = -EAGAIN; + goto out; + } + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + ipv6_encap_size, encap_header, &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +out: + if (n) + neigh_release(n); + return err; +} + +static int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct ip_tunnel_info *tun_info, + struct net_device *mirred_dev, + struct net_device **encap_dev, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned short family = ip_tunnel_info_af(tun_info); + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct ip_tunnel_key *key = &tun_info->key; + struct mlx5e_encap_entry *e; + int tunnel_type, err = 0; + uintptr_t hash_key; + bool found = false; + + /* udp dst port must be set */ + if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) + goto vxlan_encap_offload_err; + + /* setting udp src port isn't supported */ + if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) { +vxlan_encap_offload_err: + netdev_warn(priv->netdev, + "must set udp dst port and not set udp src port\n"); + return -EOPNOTSUPP; + } + + if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) && + MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { + tunnel_type = MLX5_HEADER_TYPE_VXLAN; + } else { + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst)); + return -EOPNOTSUPP; + } + + hash_key = hash_encap_info(key); + + hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, + encap_hlist, hash_key) { + if (!cmp_encap_info(&e->tun_info.key, key)) { + found = true; + break; + } + } + + /* must verify if encap is valid or not */ + if (found) + goto attach_flow; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->tun_info = *tun_info; + e->tunnel_type = tunnel_type; + INIT_LIST_HEAD(&e->flows); + + if (family == AF_INET) + err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e); + else if (family == AF_INET6) + err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e); + + if (err && err != -EAGAIN) + goto out_err; + + hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + +attach_flow: + list_add(&flow->encap, &e->flows); + *encap_dev = e->out_dev; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + attr->encap_id = e->encap_id; + else + err = -EAGAIN; + + return err; + +out_err: + kfree(e); + return err; +} + +static int parse_tc_vlan_action(struct mlx5e_priv *priv, + const struct tc_action *a, + struct mlx5_esw_flow_attr *attr, + u32 *action) +{ + u8 vlan_idx = attr->total_vlan; + + if (vlan_idx >= MLX5_FS_VLAN_DEPTH) + return -EOPNOTSUPP; + + if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { + if (vlan_idx) { + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, + MLX5_FS_VLAN_DEPTH)) + return -EOPNOTSUPP; + + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2; + } else { + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + } + } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) { + attr->vlan_vid[vlan_idx] = tcf_vlan_push_vid(a); + attr->vlan_prio[vlan_idx] = tcf_vlan_push_prio(a); + attr->vlan_proto[vlan_idx] = tcf_vlan_push_proto(a); + if (!attr->vlan_proto[vlan_idx]) + attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); + + if (vlan_idx) { + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, + MLX5_FS_VLAN_DEPTH)) + return -EOPNOTSUPP; + + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + } else { + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && + (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) || + tcf_vlan_push_prio(a))) + return -EOPNOTSUPP; + + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + } + } else { /* action is TCA_VLAN_ACT_MODIFY */ + return -EOPNOTSUPP; + } + + attr->total_vlan = vlan_idx + 1; + + return 0; +} + +static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct ip_tunnel_info *info = NULL; + const struct tc_action *a; + LIST_HEAD(actions); + bool encap = false; + u32 action = 0; + int err, i; + + if (!tcf_exts_has_actions(exts)) + return -EINVAL; + + attr->in_rep = rpriv->rep; + attr->in_mdev = priv->mdev; + + tcf_exts_for_each_action(i, a, exts) { + if (is_tcf_gact_shot(a)) { + action |= MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + continue; + } + + if (is_tcf_pedit(a)) { + err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB, + parse_attr); + if (err) + return err; + + action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + attr->mirror_count = attr->out_count; + continue; + } + + if (is_tcf_csum(a)) { + if (csum_offload_supported(priv, action, + tcf_csum_update_flags(a))) + continue; + + return -EOPNOTSUPP; + } + + if (is_tcf_mirred_egress_redirect(a) || is_tcf_mirred_egress_mirror(a)) { + struct mlx5e_priv *out_priv; + struct net_device *out_dev; + + out_dev = tcf_mirred_dev(a); + + if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { + pr_err("can't support more than %d output ports, can't offload forwarding\n", + attr->out_count); + return -EOPNOTSUPP; + } + + if (switchdev_port_same_parent_id(priv->netdev, + out_dev) || + is_merged_eswitch_dev(priv, out_dev)) { + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + out_priv = netdev_priv(out_dev); + rpriv = out_priv->ppriv; + attr->out_rep[attr->out_count] = rpriv->rep; + attr->out_mdev[attr->out_count++] = out_priv->mdev; + } else if (encap) { + parse_attr->mirred_ifindex = out_dev->ifindex; + parse_attr->tun_info = *info; + attr->parse_attr = parse_attr; + action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + /* attr->out_rep is resolved when we handle encap */ + } else { + pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EINVAL; + } + continue; + } + + if (is_tcf_tunnel_set(a)) { + info = tcf_tunnel_info(a); + if (info) + encap = true; + else + return -EOPNOTSUPP; + attr->mirror_count = attr->out_count; + continue; + } + + if (is_tcf_vlan(a)) { + err = parse_tc_vlan_action(priv, a, attr, &action); + + if (err) + return err; + + attr->mirror_count = attr->out_count; + continue; + } + + if (is_tcf_tunnel_release(a)) { + action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + continue; + } + + return -EINVAL; + } + + attr->action = action; + if (!actions_match_supported(priv, exts, parse_attr, flow)) + return -EOPNOTSUPP; + + if (attr->out_count > 1 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { + netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +static void get_flags(int flags, u8 *flow_flags) +{ + u8 __flow_flags = 0; + + if (flags & MLX5E_TC_INGRESS) + __flow_flags |= MLX5E_TC_FLOW_INGRESS; + if (flags & MLX5E_TC_EGRESS) + __flow_flags |= MLX5E_TC_FLOW_EGRESS; + + *flow_flags = __flow_flags; +} + +static const struct rhashtable_params tc_ht_params = { + .head_offset = offsetof(struct mlx5e_tc_flow, node), + .key_offset = offsetof(struct mlx5e_tc_flow, cookie), + .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie), + .automatic_shrinking = true, +}; + +static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *uplink_rpriv; + + if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + return &uplink_rpriv->tc_ht; + } else + return &priv->fs.tc.ht; +} + +int mlx5e_configure_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, int flags) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct rhashtable *tc_ht = get_tc_ht(priv); + struct mlx5e_tc_flow *flow; + int attr_size, err = 0; + u8 flow_flags = 0; + + get_flags(flags, &flow_flags); + + flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + if (flow) { + netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie); + return 0; + } + + if (esw && esw->mode == SRIOV_OFFLOADS) { + flow_flags |= MLX5E_TC_FLOW_ESWITCH; + attr_size = sizeof(struct mlx5_esw_flow_attr); + } else { + flow_flags |= MLX5E_TC_FLOW_NIC; + attr_size = sizeof(struct mlx5_nic_flow_attr); + } + + flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); + parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); + if (!parse_attr || !flow) { + err = -ENOMEM; + goto err_free; + } + + flow->cookie = f->cookie; + flow->flags = flow_flags; + flow->priv = priv; + + err = parse_cls_flower(priv, flow, &parse_attr->spec, f); + if (err < 0) + goto err_free; + + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow); + if (err < 0) + goto err_free; + flow->rule[0] = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); + } else { + err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); + if (err < 0) + goto err_free; + flow->rule[0] = mlx5e_tc_add_nic_flow(priv, parse_attr, flow); + } + + if (IS_ERR(flow->rule[0])) { + err = PTR_ERR(flow->rule[0]); + if (err != -EAGAIN) + goto err_free; + } + + if (err != -EAGAIN) + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + + if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || + !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)) + kvfree(parse_attr); + + err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); + if (err) { + mlx5e_tc_del_flow(priv, flow); + kfree(flow); + } + + return err; + +err_free: + kvfree(parse_attr); + kfree(flow); + return err; +} + +#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS) +#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS) + +static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) +{ + if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK)) + return true; + + return false; +} + +int mlx5e_delete_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, int flags) +{ + struct rhashtable *tc_ht = get_tc_ht(priv); + struct mlx5e_tc_flow *flow; + + flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + if (!flow || !same_flow_direction(flow, flags)) + return -EINVAL; + + rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); + + mlx5e_tc_del_flow(priv, flow); + + kfree(flow); + + return 0; +} + +int mlx5e_stats_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, int flags) +{ + struct rhashtable *tc_ht = get_tc_ht(priv); + struct mlx5e_tc_flow *flow; + struct mlx5_fc *counter; + u64 bytes; + u64 packets; + u64 lastuse; + + flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + if (!flow || !same_flow_direction(flow, flags)) + return -EINVAL; + + if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED)) + return 0; + + counter = mlx5_flow_rule_counter(flow->rule[0]); + if (!counter) + return 0; + + mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + + tcf_exts_stats_update(f->exts, bytes, packets, lastuse); + + return 0; +} + +static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, + struct mlx5e_priv *peer_priv) +{ + struct mlx5_core_dev *peer_mdev = peer_priv->mdev; + struct mlx5e_hairpin_entry *hpe; + u16 peer_vhca_id; + int bkt; + + if (!same_hw_devs(priv, peer_priv)) + return; + + peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); + + hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) { + if (hpe->peer_vhca_id == peer_vhca_id) + hpe->hp->pair->peer_gone = true; + } +} + +static int mlx5e_tc_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct mlx5e_flow_steering *fs; + struct mlx5e_priv *peer_priv; + struct mlx5e_tc_table *tc; + struct mlx5e_priv *priv; + + if (ndev->netdev_ops != &mlx5e_netdev_ops || + event != NETDEV_UNREGISTER || + ndev->reg_state == NETREG_REGISTERED) + return NOTIFY_DONE; + + tc = container_of(this, struct mlx5e_tc_table, netdevice_nb); + fs = container_of(tc, struct mlx5e_flow_steering, tc); + priv = container_of(fs, struct mlx5e_priv, fs); + peer_priv = netdev_priv(ndev); + if (priv == peer_priv || + !(priv->netdev->features & NETIF_F_HW_TC)) + return NOTIFY_DONE; + + mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv); + + return NOTIFY_DONE; +} + +int mlx5e_tc_nic_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = &priv->fs.tc; + int err; + + hash_init(tc->mod_hdr_tbl); + hash_init(tc->hairpin_tbl); + + err = rhashtable_init(&tc->ht, &tc_ht_params); + if (err) + return err; + + tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; + if (register_netdevice_notifier(&tc->netdevice_nb)) { + tc->netdevice_nb.notifier_call = NULL; + mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); + } + + return err; +} + +static void _mlx5e_tc_del_flow(void *ptr, void *arg) +{ + struct mlx5e_tc_flow *flow = ptr; + struct mlx5e_priv *priv = flow->priv; + + mlx5e_tc_del_flow(priv, flow); + kfree(flow); +} + +void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = &priv->fs.tc; + + if (tc->netdevice_nb.notifier_call) + unregister_netdevice_notifier(&tc->netdevice_nb); + + rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); + + if (!IS_ERR_OR_NULL(tc->t)) { + mlx5_destroy_flow_table(tc->t); + tc->t = NULL; + } +} + +int mlx5e_tc_esw_init(struct rhashtable *tc_ht) +{ + return rhashtable_init(tc_ht, &tc_ht_params); +} + +void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) +{ + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); +} + +int mlx5e_tc_num_filters(struct mlx5e_priv *priv) +{ + struct rhashtable *tc_ht = get_tc_ht(priv); + + return atomic_read(&tc_ht->nelems); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h new file mode 100644 index 000000000..49436bf3b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_EN_TC_H__ +#define __MLX5_EN_TC_H__ + +#include <net/pkt_cls.h> + +#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff + +#ifdef CONFIG_MLX5_ESWITCH + +enum { + MLX5E_TC_INGRESS = BIT(0), + MLX5E_TC_EGRESS = BIT(1), + MLX5E_TC_LAST_EXPORTED_BIT = 1, +}; + +int mlx5e_tc_nic_init(struct mlx5e_priv *priv); +void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); + +int mlx5e_tc_esw_init(struct rhashtable *tc_ht); +void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); + +int mlx5e_configure_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, int flags); +int mlx5e_delete_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, int flags); + +int mlx5e_stats_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, int flags); + +struct mlx5e_encap_entry; +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +struct mlx5e_neigh_hash_entry; +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); + +int mlx5e_tc_num_filters(struct mlx5e_priv *priv); + +#else /* CONFIG_MLX5_ESWITCH */ +static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} +static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; } +#endif + +#endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c new file mode 100644 index 000000000..52d3989bb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -0,0 +1,730 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/tcp.h> +#include <linux/if_vlan.h> +#include <net/dsfield.h> +#include "en.h" +#include "ipoib/ipoib.h" +#include "en_accel/en_accel.h" +#include "lib/clock.h" + +#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS + +#ifndef CONFIG_MLX5_EN_TLS +#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ + MLX5E_SQ_NOPS_ROOM) +#else +/* TLS offload requires MLX5E_SQ_STOP_ROOM to have + * enough room for a resync SKB, a normal SKB and a NOP + */ +#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\ + MLX5E_SQ_NOPS_ROOM) +#endif + +static inline void mlx5e_tx_dma_unmap(struct device *pdev, + struct mlx5e_sq_dma *dma) +{ + switch (dma->type) { + case MLX5E_DMA_MAP_SINGLE: + dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + case MLX5E_DMA_MAP_PAGE: + dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + default: + WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); + } +} + +static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) +{ + return &sq->db.dma_fifo[i & sq->dma_fifo_mask]; +} + +static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq, + dma_addr_t addr, + u32 size, + enum mlx5e_dma_map_type map_type) +{ + struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); + + dma->addr = addr; + dma->size = size; + dma->type = map_type; +} + +static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) +{ + int i; + + for (i = 0; i < num_dma; i++) { + struct mlx5e_sq_dma *last_pushed_dma = + mlx5e_dma_get(sq, --sq->dma_fifo_pc); + + mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma); + } +} + +#ifdef CONFIG_MLX5_CORE_EN_DCB +static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ + int dscp_cp = 0; + + if (skb->protocol == htons(ETH_P_IP)) + dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + else if (skb->protocol == htons(ETH_P_IPV6)) + dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; + + return priv->dcbx_dp.dscp2prio[dscp_cp]; +} +#endif + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int channel_ix = fallback(dev, skb, NULL); + u16 num_channels; + int up = 0; + + if (!netdev_get_num_tc(dev)) + return channel_ix; + +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP) + up = mlx5e_get_dscp_up(priv, skb); + else +#endif + if (skb_vlan_tag_present(skb)) + up = skb->vlan_tci >> VLAN_PRIO_SHIFT; + + /* channel_ix can be larger than num_channels since + * dev->num_real_tx_queues = num_channels * num_tc + */ + num_channels = priv->channels.params.num_channels; + if (channel_ix >= num_channels) + channel_ix = reciprocal_scale(channel_ix, num_channels); + + return priv->channel_tc2txq[channel_ix][up]; +} + +static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) +{ +#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) + + return max(skb_network_offset(skb), MLX5E_MIN_INLINE); +} + +static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) +{ + struct flow_keys keys; + + if (skb_transport_header_was_set(skb)) + return skb_transport_offset(skb); + else if (skb_flow_dissect_flow_keys(skb, &keys, 0)) + return keys.control.thoff; + else + return mlx5e_skb_l2_header_offset(skb); +} + +static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, + struct sk_buff *skb) +{ + u16 hlen; + + switch (mode) { + case MLX5_INLINE_MODE_NONE: + return 0; + case MLX5_INLINE_MODE_TCP_UDP: + hlen = eth_get_headlen(skb->data, skb_headlen(skb)); + if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) + hlen += VLAN_HLEN; + break; + case MLX5_INLINE_MODE_IP: + /* When transport header is set to zero, it means no transport + * header. When transport header is set to 0xff's, it means + * transport header wasn't set. + */ + if (skb_transport_offset(skb)) { + hlen = mlx5e_skb_l3_header_offset(skb); + break; + } + /* fall through */ + case MLX5_INLINE_MODE_L2: + default: + hlen = mlx5e_skb_l2_header_offset(skb); + } + return min_t(u16, hlen, skb_headlen(skb)); +} + +static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) +{ + struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; + int cpy1_sz = 2 * ETH_ALEN; + int cpy2_sz = ihs - cpy1_sz; + + memcpy(vhdr, skb->data, cpy1_sz); + vhdr->h_vlan_proto = skb->vlan_proto; + vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); + memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); +} + +static inline void +mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) +{ + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; + if (skb->encapsulation) { + eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | + MLX5_ETH_WQE_L4_INNER_CSUM; + sq->stats->csum_partial_inner++; + } else { + eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; + sq->stats->csum_partial++; + } + } else + sq->stats->csum_none++; +} + +static inline u16 +mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb) +{ + struct mlx5e_sq_stats *stats = sq->stats; + u16 ihs; + + if (skb->encapsulation) { + ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); + stats->tso_inner_packets++; + stats->tso_inner_bytes += skb->len - ihs; + } else { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + ihs = skb_transport_offset(skb) + sizeof(struct udphdr); + else + ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); + stats->tso_packets++; + stats->tso_bytes += skb->len - ihs; + } + + return ihs; +} + +static inline int +mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, + unsigned char *skb_data, u16 headlen, + struct mlx5_wqe_data_seg *dseg) +{ + dma_addr_t dma_addr = 0; + u8 num_dma = 0; + int i; + + if (headlen) { + dma_addr = dma_map_single(sq->pdev, skb_data, headlen, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + goto dma_unmap_wqe_err; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(headlen); + + mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); + num_dma++; + dseg++; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + int fsz = skb_frag_size(frag); + + dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + goto dma_unmap_wqe_err; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(fsz); + + mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + num_dma++; + dseg++; + } + + return num_dma; + +dma_unmap_wqe_err: + mlx5e_dma_unmap_wqe_err(sq, num_dma); + return -ENOMEM; +} + +static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, + struct mlx5_wq_cyc *wq, + u16 pi, u16 nnops) +{ + struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; + + edge_wi = wi + nnops; + + /* fill sq frag edge with nops to avoid wqe wrapping two pages */ + for (; wi < edge_wi; wi++) { + wi->skb = NULL; + wi->num_wqebbs = 1; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + sq->stats->nop += nnops; +} + +static inline void +mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, + u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma, + struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + + wi->num_bytes = num_bytes; + wi->num_dma = num_dma; + wi->num_wqebbs = num_wqebbs; + wi->skb = skb; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + + netdev_tx_sent_queue(sq->txq, num_bytes); + + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + + sq->pc += wi->num_wqebbs; + if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) { + netif_tx_stop_queue(sq->txq); + sq->stats->stopped++; + } + + if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); +} + +#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) + +netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_tx_wqe *wqe, u16 pi) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_tx_wqe_info *wi; + + struct mlx5e_sq_stats *stats = sq->stats; + u16 headlen, ihs, contig_wqebbs_room; + u16 ds_cnt, ds_cnt_inl = 0; + u8 num_wqebbs, opcode; + u32 num_bytes; + int num_dma; + __be16 mss; + + /* Calc ihs and ds cnt, no writes to wqe yet */ + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + if (skb_is_gso(skb)) { + opcode = MLX5_OPCODE_LSO; + mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + ihs = mlx5e_tx_get_gso_ihs(sq, skb); + num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; + stats->packets += skb_shinfo(skb)->gso_segs; + } else { + opcode = MLX5_OPCODE_SEND; + mss = 0; + ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + stats->packets++; + } + + stats->bytes += num_bytes; + stats->xmit_more += skb->xmit_more; + + headlen = skb->len - ihs - skb->data_len; + ds_cnt += !!headlen; + ds_cnt += skb_shinfo(skb)->nr_frags; + + if (ihs) { + ihs += !!skb_vlan_tag_present(skb) * VLAN_HLEN; + + ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS); + ds_cnt += ds_cnt_inl; + } + + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < num_wqebbs)) { +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5_wqe_eth_seg cur_eth = wqe->eth; +#endif + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); + mlx5e_sq_fetch_wqe(sq, &wqe, &pi); +#ifdef CONFIG_MLX5_EN_IPSEC + wqe->eth = cur_eth; +#endif + } + + /* fill wqe */ + wi = &sq->db.wqe_info[pi]; + cseg = &wqe->ctrl; + eseg = &wqe->eth; + dseg = wqe->data; + + mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + + eseg->mss = mss; + + if (ihs) { + eseg->inline_hdr.sz = cpu_to_be16(ihs); + if (skb_vlan_tag_present(skb)) { + ihs -= VLAN_HLEN; + mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs); + stats->added_vlan_packets++; + } else { + memcpy(eseg->inline_hdr.start, skb->data, ihs); + } + dseg += ds_cnt_inl; + } else if (skb_vlan_tag_present(skb)) { + eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); + if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) + eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); + eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); + stats->added_vlan_packets++; + } + + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg); + if (unlikely(num_dma < 0)) + goto err_drop; + + mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes, + num_dma, wi, cseg); + + return NETDEV_TX_OK; + +err_drop: + stats->dropped++; + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; +} + +netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_tx_wqe *wqe; + struct mlx5e_txqsq *sq; + u16 pi; + + sq = priv->txq2sq[skb_get_queue_mapping(skb)]; + mlx5e_sq_fetch_wqe(sq, &wqe, &pi); + + /* might send skbs and update wqe and pi */ + skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi); + if (unlikely(!skb)) + return NETDEV_TX_OK; + + return mlx5e_sq_xmit(sq, skb, wqe, pi); +} + +static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq, + struct mlx5_err_cqe *err_cqe) +{ + struct mlx5_cqwq *wq = &sq->cq.wq; + u32 ci; + + ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1); + + netdev_err(sq->channel->netdev, + "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", + sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome, + err_cqe->vendor_err_synd); + mlx5_dump_err_cqe(sq->cq.mdev, err_cqe); +} + +bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) +{ + struct mlx5e_sq_stats *stats; + struct mlx5e_txqsq *sq; + struct mlx5_cqe64 *cqe; + u32 dma_fifo_cc; + u32 nbytes; + u16 npkts; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_txqsq, cq); + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return false; + + stats = sq->stats; + + npkts = 0; + nbytes = 0; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + /* avoid dirtying sq cache line every cqe */ + dma_fifo_cc = sq->dma_fifo_cc; + + i = 0; + do { + u16 wqe_counter; + bool last_wqe; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) { + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, + &sq->state)) { + mlx5e_dump_error_cqe(sq, + (struct mlx5_err_cqe *)cqe); + queue_work(cq->channel->priv->wq, + &sq->recover.recover_work); + } + stats->cqe_err++; + } + + do { + struct mlx5e_tx_wqe_info *wi; + struct sk_buff *skb; + u16 ci; + int j; + + last_wqe = (sqcc == wqe_counter); + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + skb = wi->skb; + + if (unlikely(!skb)) { /* nop */ + sqcc++; + continue; + } + + if (unlikely(skb_shinfo(skb)->tx_flags & + SKBTX_HW_TSTAMP)) { + struct skb_shared_hwtstamps hwts = {}; + + hwts.hwtstamp = + mlx5_timecounter_cyc2time(sq->clock, + get_cqe_ts(cqe)); + skb_tstamp_tx(skb, &hwts); + } + + for (j = 0; j < wi->num_dma; j++) { + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, dma_fifo_cc++); + + mlx5e_tx_dma_unmap(sq->pdev, dma); + } + + npkts++; + nbytes += wi->num_bytes; + sqcc += wi->num_wqebbs; + napi_consume_skb(skb, napi_budget); + } while (!last_wqe); + + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + + stats->cqes += i; + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->dma_fifo_cc = dma_fifo_cc; + sq->cc = sqcc; + + netdev_tx_completed_queue(sq->txq, npkts, nbytes); + + if (netif_tx_queue_stopped(sq->txq) && + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, + MLX5E_SQ_STOP_ROOM) && + !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { + netif_tx_wake_queue(sq->txq); + stats->wake++; + } + + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + +void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + u32 nbytes = 0; + u16 ci, npkts = 0; + struct sk_buff *skb; + int i; + + while (sq->cc != sq->pc) { + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); + wi = &sq->db.wqe_info[ci]; + skb = wi->skb; + + if (!skb) { /* nop */ + sq->cc++; + continue; + } + + for (i = 0; i < wi->num_dma; i++) { + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, sq->dma_fifo_cc++); + + mlx5e_tx_dma_unmap(sq->pdev, dma); + } + + dev_kfree_skb_any(skb); + npkts++; + nbytes += wi->num_bytes; + sq->cc += wi->num_wqebbs; + } + netdev_tx_completed_queue(sq->txq, npkts, nbytes); +} + +#ifdef CONFIG_MLX5_CORE_IPOIB +static inline void +mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, + struct mlx5_wqe_datagram_seg *dseg) +{ + memcpy(&dseg->av, av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(dqkey); +} + +netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_av *av, u32 dqpn, u32 dqkey) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5i_tx_wqe *wqe; + + struct mlx5_wqe_datagram_seg *datagram; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_tx_wqe_info *wi; + + struct mlx5e_sq_stats *stats = sq->stats; + u16 headlen, ihs, pi, contig_wqebbs_room; + u16 ds_cnt, ds_cnt_inl = 0; + u8 num_wqebbs, opcode; + u32 num_bytes; + int num_dma; + __be16 mss; + + /* Calc ihs and ds cnt, no writes to wqe yet */ + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + if (skb_is_gso(skb)) { + opcode = MLX5_OPCODE_LSO; + mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + ihs = mlx5e_tx_get_gso_ihs(sq, skb); + num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; + stats->packets += skb_shinfo(skb)->gso_segs; + } else { + opcode = MLX5_OPCODE_SEND; + mss = 0; + ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + stats->packets++; + } + + stats->bytes += num_bytes; + stats->xmit_more += skb->xmit_more; + + headlen = skb->len - ihs - skb->data_len; + ds_cnt += !!headlen; + ds_cnt += skb_shinfo(skb)->nr_frags; + + if (ihs) { + ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS); + ds_cnt += ds_cnt_inl; + } + + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < num_wqebbs)) { + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + } + + mlx5i_sq_fetch_wqe(sq, &wqe, pi); + + /* fill wqe */ + wi = &sq->db.wqe_info[pi]; + cseg = &wqe->ctrl; + datagram = &wqe->datagram; + eseg = &wqe->eth; + dseg = wqe->data; + + mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); + + mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + + eseg->mss = mss; + + if (ihs) { + memcpy(eseg->inline_hdr.start, skb->data, ihs); + eseg->inline_hdr.sz = cpu_to_be16(ihs); + dseg += ds_cnt_inl; + } + + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg); + if (unlikely(num_dma < 0)) + goto err_drop; + + mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes, + num_dma, wi, cseg); + + return NETDEV_TX_OK; + +err_drop: + stats->dropped++; + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c new file mode 100644 index 000000000..85d517360 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/irq.h> +#include "en.h" +#include "en/xdp.h" + +static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) +{ + int current_cpu = smp_processor_id(); + const struct cpumask *aff; + struct irq_data *idata; + + idata = irq_desc_get_irq_data(c->irq_desc); + aff = irq_data_get_affinity_mask(idata); + return cpumask_test_cpu(current_cpu, aff); +} + +static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) +{ + struct mlx5e_sq_stats *stats = sq->stats; + struct net_dim_sample dim_sample; + + if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state))) + return; + + net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes, + &dim_sample); + net_dim(&sq->dim, dim_sample); +} + +static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) +{ + struct mlx5e_rq_stats *stats = rq->stats; + struct net_dim_sample dim_sample; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state))) + return; + + net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes, + &dim_sample); + net_dim(&rq->dim, dim_sample); +} + +int mlx5e_napi_poll(struct napi_struct *napi, int budget) +{ + struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, + napi); + struct mlx5e_ch_stats *ch_stats = c->stats; + bool busy = false; + int work_done = 0; + int i; + + ch_stats->poll++; + + for (i = 0; i < c->num_tc; i++) + busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); + + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); + + if (c->xdp) + busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq); + + if (likely(budget)) { /* budget=0 means: don't poll rx rings */ + work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); + busy |= work_done == budget; + } + + busy |= c->rq.post_wqes(&c->rq); + + if (busy) { + if (likely(mlx5e_channel_no_affinity_change(c))) + return budget; + ch_stats->aff_change++; + if (budget && work_done == budget) + work_done--; + } + + if (unlikely(!napi_complete_done(napi, work_done))) + return work_done; + + ch_stats->arm++; + + for (i = 0; i < c->num_tc; i++) { + mlx5e_handle_tx_dim(&c->sq[i]); + mlx5e_cq_arm(&c->sq[i].cq); + } + + mlx5e_handle_rx_dim(&c->rq); + + mlx5e_cq_arm(&c->rq.cq); + mlx5e_cq_arm(&c->icosq.cq); + mlx5e_cq_arm(&c->xdpsq.cq); + + return work_done; +} + +void mlx5e_completion_event(struct mlx5_core_cq *mcq) +{ + struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); + + napi_schedule(cq->napi); + cq->event_ctr++; + cq->channel->stats->events++; +} + +void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event) +{ + struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); + struct mlx5e_channel *c = cq->channel; + struct net_device *netdev = c->netdev; + + netdev_err(netdev, "%s: cqn=0x%.6x event=0x%.2x\n", + __func__, mcq->cqn, event); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c new file mode 100644 index 000000000..c1e1a16a9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -0,0 +1,991 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif +#include "mlx5_core.h" +#include "fpga/core.h" +#include "eswitch.h" +#include "lib/clock.h" +#include "diag/fw_tracer.h" + +enum { + MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), + MLX5_EQE_OWNER_INIT_VAL = 0x1, +}; + +enum { + MLX5_EQ_STATE_ARMED = 0x9, + MLX5_EQ_STATE_FIRED = 0xa, + MLX5_EQ_STATE_ALWAYS_ARMED = 0xb, +}; + +enum { + MLX5_NUM_SPARE_EQE = 0x80, + MLX5_NUM_ASYNC_EQE = 0x1000, + MLX5_NUM_CMD_EQE = 32, + MLX5_NUM_PF_DRAIN = 64, +}; + +enum { + MLX5_EQ_DOORBEL_OFFSET = 0x40, +}; + +#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ + (1ull << MLX5_EVENT_TYPE_COMM_EST) | \ + (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \ + (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \ + (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) + +struct map_eq_in { + u64 mask; + u32 reserved; + u32 unmap_eqn; +}; + +struct cre_des_eq { + u8 reserved[15]; + u8 eqn; +}; + +static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) +{ + u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {0}; + + MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ); + MLX5_SET(destroy_eq_in, in, eq_number, eqn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) +{ + return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); +} + +static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); + + return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; +} + +static const char *eqe_type_str(u8 type) +{ + switch (type) { + case MLX5_EVENT_TYPE_COMP: + return "MLX5_EVENT_TYPE_COMP"; + case MLX5_EVENT_TYPE_PATH_MIG: + return "MLX5_EVENT_TYPE_PATH_MIG"; + case MLX5_EVENT_TYPE_COMM_EST: + return "MLX5_EVENT_TYPE_COMM_EST"; + case MLX5_EVENT_TYPE_SQ_DRAINED: + return "MLX5_EVENT_TYPE_SQ_DRAINED"; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; + case MLX5_EVENT_TYPE_CQ_ERROR: + return "MLX5_EVENT_TYPE_CQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_INTERNAL_ERROR: + return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; + case MLX5_EVENT_TYPE_PORT_CHANGE: + return "MLX5_EVENT_TYPE_PORT_CHANGE"; + case MLX5_EVENT_TYPE_GPIO_EVENT: + return "MLX5_EVENT_TYPE_GPIO_EVENT"; + case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: + return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; + case MLX5_EVENT_TYPE_REMOTE_CONFIG: + return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; + case MLX5_EVENT_TYPE_DB_BF_CONGESTION: + return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; + case MLX5_EVENT_TYPE_STALL_EVENT: + return "MLX5_EVENT_TYPE_STALL_EVENT"; + case MLX5_EVENT_TYPE_CMD: + return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_PAGE_REQUEST: + return "MLX5_EVENT_TYPE_PAGE_REQUEST"; + case MLX5_EVENT_TYPE_PAGE_FAULT: + return "MLX5_EVENT_TYPE_PAGE_FAULT"; + case MLX5_EVENT_TYPE_PPS_EVENT: + return "MLX5_EVENT_TYPE_PPS_EVENT"; + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; + case MLX5_EVENT_TYPE_FPGA_ERROR: + return "MLX5_EVENT_TYPE_FPGA_ERROR"; + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + return "MLX5_EVENT_TYPE_GENERAL_EVENT"; + case MLX5_EVENT_TYPE_DEVICE_TRACER: + return "MLX5_EVENT_TYPE_DEVICE_TRACER"; + default: + return "Unrecognized event"; + } +} + +static enum mlx5_dev_event port_subtype_event(u8 subtype) +{ + switch (subtype) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + return MLX5_DEV_EVENT_PORT_DOWN; + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + return MLX5_DEV_EVENT_PORT_UP; + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + return MLX5_DEV_EVENT_PORT_INITIALIZED; + case MLX5_PORT_CHANGE_SUBTYPE_LID: + return MLX5_DEV_EVENT_LID_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + return MLX5_DEV_EVENT_PKEY_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + return MLX5_DEV_EVENT_GUID_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + return MLX5_DEV_EVENT_CLIENT_REREG; + } + return -1; +} + +static void eq_update_ci(struct mlx5_eq *eq, int arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +static void eqe_pf_action(struct work_struct *work) +{ + struct mlx5_pagefault *pfault = container_of(work, + struct mlx5_pagefault, + work); + struct mlx5_eq *eq = pfault->eq; + + mlx5_core_page_fault(eq->dev, pfault); + mempool_free(pfault, eq->pf_ctx.pool); +} + +static void eq_pf_process(struct mlx5_eq *eq) +{ + struct mlx5_core_dev *dev = eq->dev; + struct mlx5_eqe_page_fault *pf_eqe; + struct mlx5_pagefault *pfault; + struct mlx5_eqe *eqe; + int set_ci = 0; + + while ((eqe = next_eqe_sw(eq))) { + pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC); + if (!pfault) { + schedule_work(&eq->pf_ctx.work); + break; + } + + dma_rmb(); + pf_eqe = &eqe->data.page_fault; + pfault->event_subtype = eqe->sub_type; + pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); + + mlx5_core_dbg(dev, + "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", + eqe->sub_type, pfault->bytes_committed); + + switch (eqe->sub_type) { + case MLX5_PFAULT_SUBTYPE_RDMA: + /* RDMA based event */ + pfault->type = + be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; + pfault->token = + be32_to_cpu(pf_eqe->rdma.pftype_token) & + MLX5_24BIT_MASK; + pfault->rdma.r_key = + be32_to_cpu(pf_eqe->rdma.r_key); + pfault->rdma.packet_size = + be16_to_cpu(pf_eqe->rdma.packet_length); + pfault->rdma.rdma_op_len = + be32_to_cpu(pf_eqe->rdma.rdma_op_len); + pfault->rdma.rdma_va = + be64_to_cpu(pf_eqe->rdma.rdma_va); + mlx5_core_dbg(dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", + pfault->type, pfault->token, + pfault->rdma.r_key); + mlx5_core_dbg(dev, + "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", + pfault->rdma.rdma_op_len, + pfault->rdma.rdma_va); + break; + + case MLX5_PFAULT_SUBTYPE_WQE: + /* WQE based event */ + pfault->type = + (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; + pfault->token = + be32_to_cpu(pf_eqe->wqe.token); + pfault->wqe.wq_num = + be32_to_cpu(pf_eqe->wqe.pftype_wq) & + MLX5_24BIT_MASK; + pfault->wqe.wqe_index = + be16_to_cpu(pf_eqe->wqe.wqe_index); + pfault->wqe.packet_size = + be16_to_cpu(pf_eqe->wqe.packet_length); + mlx5_core_dbg(dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", + pfault->type, pfault->token, + pfault->wqe.wq_num, + pfault->wqe.wqe_index); + break; + + default: + mlx5_core_warn(dev, + "Unsupported page fault event sub-type: 0x%02hhx\n", + eqe->sub_type); + /* Unsupported page faults should still be + * resolved by the page fault handler + */ + } + + pfault->eq = eq; + INIT_WORK(&pfault->work, eqe_pf_action); + queue_work(eq->pf_ctx.wq, &pfault->work); + + ++eq->cons_index; + ++set_ci; + + if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { + eq_update_ci(eq, 0); + set_ci = 0; + } + } + + eq_update_ci(eq, 1); +} + +static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr) +{ + struct mlx5_eq *eq = eq_ptr; + unsigned long flags; + + if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) { + eq_pf_process(eq); + spin_unlock_irqrestore(&eq->pf_ctx.lock, flags); + } else { + schedule_work(&eq->pf_ctx.work); + } + + return IRQ_HANDLED; +} + +/* mempool_refill() was proposed but unfortunately wasn't accepted + * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html + * Chip workaround. + */ +static void mempool_refill(mempool_t *pool) +{ + while (pool->curr_nr < pool->min_nr) + mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); +} + +static void eq_pf_action(struct work_struct *work) +{ + struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work); + + mempool_refill(eq->pf_ctx.pool); + + spin_lock_irq(&eq->pf_ctx.lock); + eq_pf_process(eq); + spin_unlock_irq(&eq->pf_ctx.lock); +} + +static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name) +{ + spin_lock_init(&pf_ctx->lock); + INIT_WORK(&pf_ctx->work, eq_pf_action); + + pf_ctx->wq = alloc_ordered_workqueue(name, + WQ_MEM_RECLAIM); + if (!pf_ctx->wq) + return -ENOMEM; + + pf_ctx->pool = mempool_create_kmalloc_pool + (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault)); + if (!pf_ctx->pool) + goto err_wq; + + return 0; +err_wq: + destroy_workqueue(pf_ctx->wq); + return -ENOMEM; +} + +int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, + u32 wq_num, u8 type, int error) +{ + u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; + + MLX5_SET(page_fault_resume_in, in, opcode, + MLX5_CMD_OP_PAGE_FAULT_RESUME); + MLX5_SET(page_fault_resume_in, in, error, !!error); + MLX5_SET(page_fault_resume_in, in, page_fault_type, type); + MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); + MLX5_SET(page_fault_resume_in, in, token, token); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); +#endif + +static void general_event_handler(struct mlx5_core_dev *dev, + struct mlx5_eqe *eqe) +{ + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: + if (dev->event) + dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0); + break; + default: + mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", + eqe->sub_type); + } +} + +static void mlx5_temp_warning_event(struct mlx5_core_dev *dev, + struct mlx5_eqe *eqe) +{ + u64 value_lsb; + u64 value_msb; + + value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); + value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); + + mlx5_core_warn(dev, + "High temperature on sensors with bit set %llx %llx", + value_msb, value_lsb); +} + +/* caller must eventually call mlx5_cq_put on the returned cq */ +static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) +{ + struct mlx5_cq_table *table = &eq->cq_table; + struct mlx5_core_cq *cq = NULL; + + spin_lock(&table->lock); + cq = radix_tree_lookup(&table->tree, cqn); + if (likely(cq)) + mlx5_cq_hold(cq); + spin_unlock(&table->lock); + + return cq; +} + +static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn) +{ + struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); + + if (unlikely(!cq)) { + mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); + return; + } + + ++cq->arm_sn; + + cq->comp(cq); + + mlx5_cq_put(cq); +} + +static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type) +{ + struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); + + if (unlikely(!cq)) { + mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); + return; + } + + cq->event(cq, event_type); + + mlx5_cq_put(cq); +} + +static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) +{ + struct mlx5_eq *eq = eq_ptr; + struct mlx5_core_dev *dev = eq->dev; + struct mlx5_eqe *eqe; + int set_ci = 0; + u32 cqn = -1; + u32 rsn; + u8 port; + + while ((eqe = next_eqe_sw(eq))) { + /* + * Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + dma_rmb(); + + mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", + eq->eqn, eqe_type_str(eqe->type)); + switch (eqe->type) { + case MLX5_EVENT_TYPE_COMP: + cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; + mlx5_eq_cq_completion(eq, cqn); + break; + case MLX5_EVENT_TYPE_DCT_DRAINED: + rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; + rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); + mlx5_rsc_event(dev, rsn, eqe->type); + break; + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); + mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n", + eqe_type_str(eqe->type), eqe->type, rsn); + mlx5_rsc_event(dev, rsn, eqe->type); + break; + + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n", + eqe_type_str(eqe->type), eqe->type, rsn); + mlx5_srq_event(dev, rsn, eqe->type); + break; + + case MLX5_EVENT_TYPE_CMD: + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); + break; + + case MLX5_EVENT_TYPE_PORT_CHANGE: + port = (eqe->data.port.port >> 4) & 0xf; + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + case MLX5_PORT_CHANGE_SUBTYPE_LID: + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + if (dev->event) + dev->event(dev, port_subtype_event(eqe->sub_type), + (unsigned long)port); + break; + default: + mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", + port, eqe->sub_type); + } + break; + case MLX5_EVENT_TYPE_CQ_ERROR: + cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; + mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", + cqn, eqe->data.cq_err.syndrome); + mlx5_eq_cq_event(eq, cqn, eqe->type); + break; + + case MLX5_EVENT_TYPE_PAGE_REQUEST: + { + u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id); + s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages); + + mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", + func_id, npages); + mlx5_core_req_pages_handler(dev, func_id, npages); + } + break; + + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); + break; + + case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: + mlx5_port_module_event(dev, eqe); + break; + + case MLX5_EVENT_TYPE_PPS_EVENT: + mlx5_pps_event(dev, eqe); + break; + + case MLX5_EVENT_TYPE_FPGA_ERROR: + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + mlx5_fpga_event(dev, eqe->type, &eqe->data.raw); + break; + + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + mlx5_temp_warning_event(dev, eqe); + break; + + case MLX5_EVENT_TYPE_GENERAL_EVENT: + general_event_handler(dev, eqe); + break; + + case MLX5_EVENT_TYPE_DEVICE_TRACER: + mlx5_fw_tracer_event(dev, eqe); + break; + + default: + mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", + eqe->type, eq->eqn); + break; + } + + ++eq->cons_index; + ++set_ci; + + /* The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MLX5_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ + if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { + eq_update_ci(eq, 0); + set_ci = 0; + } + } + + eq_update_ci(eq, 1); + + if (cqn != -1) + tasklet_schedule(&eq->tasklet_ctx.task); + + return IRQ_HANDLED; +} + +/* Some architectures don't latch interrupts when they are disabled, so using + * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to + * avoid losing them. It is not recommended to use it, unless this is the last + * resort. + */ +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq) +{ + u32 count_eqe; + + disable_irq(eq->irqn); + count_eqe = eq->cons_index; + mlx5_eq_int(eq->irqn, eq); + count_eqe = eq->cons_index - count_eqe; + enable_irq(eq->irqn); + + return count_eqe; +} + +static void init_eq_buf(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe; + int i; + + for (i = 0; i < eq->nent; i++) { + eqe = get_eqe(eq, i); + eqe->owner = MLX5_EQE_OWNER_INIT_VAL; + } +} + +int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, + int nent, u64 mask, const char *name, + enum mlx5_eq_type type) +{ + struct mlx5_cq_table *cq_table = &eq->cq_table; + u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; + struct mlx5_priv *priv = &dev->priv; + irq_handler_t handler; + __be64 *pas; + void *eqc; + int inlen; + u32 *in; + int err; + + /* Init CQ table */ + memset(cq_table, 0, sizeof(*cq_table)); + spin_lock_init(&cq_table->lock); + INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); + + eq->type = type; + eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); + eq->cons_index = 0; + err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); + if (err) + return err; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (type == MLX5_EQ_TYPE_PF) + handler = mlx5_eq_pf_int; + else +#endif + handler = mlx5_eq_int; + + init_eq_buf(eq); + + inlen = MLX5_ST_SZ_BYTES(create_eq_in) + + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_buf; + } + + pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); + mlx5_fill_page_array(&eq->buf, pas); + + MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); + MLX5_SET64(create_eq_in, in, event_bitmask, mask); + + eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); + MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); + MLX5_SET(eqc, eqc, uar_page, priv->uar->index); + MLX5_SET(eqc, eqc, intr, vecidx); + MLX5_SET(eqc, eqc, log_page_size, + eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (err) + goto err_in; + + snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", + name, pci_name(dev->pdev)); + + eq->eqn = MLX5_GET(create_eq_out, out, eq_number); + eq->irqn = pci_irq_vector(dev->pdev, vecidx); + eq->dev = dev; + eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; + err = request_irq(eq->irqn, handler, 0, + priv->irq_info[vecidx].name, eq); + if (err) + goto err_eq; + + err = mlx5_debug_eq_add(dev, eq); + if (err) + goto err_irq; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (type == MLX5_EQ_TYPE_PF) { + err = init_pf_ctx(&eq->pf_ctx, name); + if (err) + goto err_irq; + } else +#endif + { + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, + (unsigned long)&eq->tasklet_ctx); + } + + /* EQs are created in ARMED state + */ + eq_update_ci(eq, 1); + + kvfree(in); + return 0; + +err_irq: + free_irq(eq->irqn, eq); + +err_eq: + mlx5_cmd_destroy_eq(dev, eq->eqn); + +err_in: + kvfree(in); + +err_buf: + mlx5_buf_free(dev, &eq->buf); + return err; +} + +int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + mlx5_debug_eq_remove(dev, eq); + free_irq(eq->irqn, eq); + err = mlx5_cmd_destroy_eq(dev, eq->eqn); + if (err) + mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", + eq->eqn); + synchronize_irq(eq->irqn); + + if (eq->type == MLX5_EQ_TYPE_COMP) { + tasklet_disable(&eq->tasklet_ctx.task); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + } else if (eq->type == MLX5_EQ_TYPE_PF) { + cancel_work_sync(&eq->pf_ctx.work); + destroy_workqueue(eq->pf_ctx.wq); + mempool_destroy(eq->pf_ctx.pool); +#endif + } + mlx5_buf_free(dev, &eq->buf); + + return err; +} + +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) +{ + struct mlx5_cq_table *table = &eq->cq_table; + int err; + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, cq->cqn, cq); + spin_unlock_irq(&table->lock); + + return err; +} + +int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) +{ + struct mlx5_cq_table *table = &eq->cq_table; + struct mlx5_core_cq *tmp; + + spin_lock_irq(&table->lock); + tmp = radix_tree_delete(&table->tree, cq->cqn); + spin_unlock_irq(&table->lock); + + if (!tmp) { + mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn); + return -ENOENT; + } + + if (tmp != cq) { + mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn); + return -EINVAL; + } + + return 0; +} + +int mlx5_eq_init(struct mlx5_core_dev *dev) +{ + int err; + + spin_lock_init(&dev->priv.eq_table.lock); + + err = mlx5_eq_debugfs_init(dev); + + return err; +} + +void mlx5_eq_cleanup(struct mlx5_core_dev *dev) +{ + mlx5_eq_debugfs_cleanup(dev); +} + +int mlx5_start_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; + int err; + + if (MLX5_VPORT_MANAGER(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); + + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && + MLX5_CAP_GEN(dev, general_notification_event)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT); + + if (MLX5_CAP_GEN(dev, port_module_event)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT); + else + mlx5_core_dbg(dev, "port_module_event is not set\n"); + + if (MLX5_PPS_CAP(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT); + + if (MLX5_CAP_GEN(dev, fpga)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) | + (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR); + if (MLX5_CAP_GEN_MAX(dev, dct)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED); + + if (MLX5_CAP_GEN(dev, temp_warn_event)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT); + + if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); + + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, + MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, + "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC); + if (err) { + mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); + return err; + } + + mlx5_cmd_use_events(dev); + + err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, + MLX5_NUM_ASYNC_EQE, async_event_mask, + "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC); + if (err) { + mlx5_core_warn(dev, "failed to create async EQ %d\n", err); + goto err1; + } + + err = mlx5_create_map_eq(dev, &table->pages_eq, + MLX5_EQ_VEC_PAGES, + /* TODO: sriov max_vf + */ 1, + 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", + MLX5_EQ_TYPE_ASYNC); + if (err) { + mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); + goto err2; + } + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (MLX5_CAP_GEN(dev, pg)) { + err = mlx5_create_map_eq(dev, &table->pfault_eq, + MLX5_EQ_VEC_PFAULT, + MLX5_NUM_ASYNC_EQE, + 1 << MLX5_EVENT_TYPE_PAGE_FAULT, + "mlx5_page_fault_eq", + MLX5_EQ_TYPE_PF); + if (err) { + mlx5_core_warn(dev, "failed to create page fault EQ %d\n", + err); + goto err3; + } + } + + return err; +err3: + mlx5_destroy_unmap_eq(dev, &table->pages_eq); +#else + return err; +#endif + +err2: + mlx5_destroy_unmap_eq(dev, &table->async_eq); + +err1: + mlx5_cmd_use_polling(dev); + mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + return err; +} + +void mlx5_stop_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + int err; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (MLX5_CAP_GEN(dev, pg)) { + err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); + if (err) + mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n", + err); + } +#endif + + err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); + if (err) + mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", + err); + + err = mlx5_destroy_unmap_eq(dev, &table->async_eq); + if (err) + mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", + err); + mlx5_cmd_use_polling(dev); + + err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + if (err) + mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", + err); +} + +int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0}; + + MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); + MLX5_SET(query_eq_in, in, eq_number, eq->eqn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq; + +#ifdef CONFIG_RFS_ACCEL + if (dev->rmap) { + free_irq_cpu_rmap(dev->rmap); + dev->rmap = NULL; + } +#endif + list_for_each_entry(eq, &table->comp_eqs_list, list) + free_irq(eq->irqn, eq); + + free_irq(table->pages_eq.irqn, &table->pages_eq); + free_irq(table->async_eq.irqn, &table->async_eq); + free_irq(table->cmd_eq.irqn, &table->cmd_eq); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (MLX5_CAP_GEN(dev, pg)) + free_irq(table->pfault_eq.irqn, &table->pfault_eq); +#endif + pci_free_irq_vectors(dev->pdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c new file mode 100644 index 000000000..2190daace --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -0,0 +1,2219 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "mlx5_core.h" +#include "eswitch.h" +#include "fs_core.h" + +#define UPLINK_VPORT 0xFFFF + +enum { + MLX5_ACTION_NONE = 0, + MLX5_ACTION_ADD = 1, + MLX5_ACTION_DEL = 2, +}; + +/* Vport UC/MC hash node */ +struct vport_addr { + struct l2addr_node node; + u8 action; + u32 vport; + struct mlx5_flow_handle *flow_rule; + bool mpfs; /* UC MAC was added to MPFs */ + /* A flag indicating that mac was added due to mc promiscuous vport */ + bool mc_promisc; +}; + +enum { + UC_ADDR_CHANGE = BIT(0), + MC_ADDR_CHANGE = BIT(1), + PROMISC_CHANGE = BIT(3), +}; + +/* Vport context events */ +#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ + MC_ADDR_CHANGE | \ + PROMISC_CHANGE) + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, + u32 events_mask) +{ + int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0}; + int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0}; + void *nic_vport_ctx; + + MLX5_SET(modify_nic_vport_context_in, in, + opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); + + if (events_mask & UC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_uc_address_change, 1); + if (events_mask & MC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_mc_address_change, 1); + if (events_mask & PROMISC_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_promisc_change, 1); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +/* E-Switch vport context HW commands */ +static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, + void *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0}; + + MLX5_SET(modify_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} + +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 vlan, u8 qos, u8 set_flags) +{ + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -EOPNOTSUPP; + + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n", + vport, vlan, qos, set_flags); + + if (set_flags & SET_VLAN_STRIP) + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_strip, 1); + + if (set_flags & SET_VLAN_INSERT) { + /* insert only if no vlan in packet */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, 1); + + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_pcp, qos); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_id, vlan); + } + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_strip, 1); + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_insert, 1); + + return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in)); +} + +/* E-Switch FDB */ +static struct mlx5_flow_handle * +__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, + u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) +{ + int match_header = (is_zero_ether_addr(mac_c) ? 0 : + MLX5_MATCH_OUTER_HEADERS); + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_spec *spec; + void *mv_misc = NULL; + void *mc_misc = NULL; + u8 *dmac_v = NULL; + u8 *dmac_c = NULL; + + if (rx_rule) + match_header |= MLX5_MATCH_MISC_PARAMETERS; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return NULL; + + dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dmac_47_16); + dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dmac_47_16); + + if (match_header & MLX5_MATCH_OUTER_HEADERS) { + ether_addr_copy(dmac_v, mac_v); + ether_addr_copy(dmac_c, mac_c); + } + + if (match_header & MLX5_MATCH_MISC_PARAMETERS) { + mv_misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT); + MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = vport; + + esw_debug(esw->dev, + "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", + dmac_v, dmac_c, vport); + spec->match_criteria_enable = match_header; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = + mlx5_add_flow_rules(esw->fdb_table.legacy.fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", + dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); + flow_rule = NULL; + } + + kvfree(spec); + return flow_rule; +} + +static struct mlx5_flow_handle * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +{ + u8 mac_c[ETH_ALEN]; + + eth_broadcast_addr(mac_c); + return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac); +} + +static struct mlx5_flow_handle * +esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) +{ + u8 mac_c[ETH_ALEN]; + u8 mac_v[ETH_ALEN]; + + eth_zero_addr(mac_c); + eth_zero_addr(mac_v); + mac_c[0] = 0x01; + mac_v[0] = 0x01; + return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v); +} + +static struct mlx5_flow_handle * +esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) +{ + u8 mac_c[ETH_ALEN]; + u8 mac_v[ETH_ALEN]; + + eth_zero_addr(mac_c); + eth_zero_addr(mac_v); + return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); +} + +static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *g; + void *match_criteria; + int table_size; + u32 *flow_group_in; + u8 *dmac; + int err = 0; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + ft_attr.max_fte = table_size; + fdb = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create FDB Table err %d\n", err); + goto out; + } + esw->fdb_table.legacy.fdb = fdb; + + /* Addresses group : Full match unicast/multicast addresses */ + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + /* Preserve 2 entries for allmulti and promisc rules*/ + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3); + eth_broadcast_addr(dmac); + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.addr_grp = g; + + /* Allmulti group : One rule that forwards any mcast traffic */ + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2); + eth_zero_addr(dmac); + dmac[0] = 0x01; + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.allmulti_grp = g; + + /* Promiscuous group : + * One rule that forward all unmatched traffic from previous groups + */ + eth_zero_addr(dmac); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.promisc_grp = g; + +out: + if (err) { + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + esw->fdb_table.legacy.allmulti_grp = NULL; + } + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + esw->fdb_table.legacy.addr_grp = NULL; + } + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.fdb)) { + mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb); + esw->fdb_table.legacy.fdb = NULL; + } + } + + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.legacy.fdb) + return; + + esw_debug(esw->dev, "Destroy FDB Table\n"); + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb); + esw->fdb_table.legacy.fdb = NULL; + esw->fdb_table.legacy.addr_grp = NULL; + esw->fdb_table.legacy.allmulti_grp = NULL; + esw->fdb_table.legacy.promisc_grp = NULL; +} + +/* E-Switch vport UC/MC lists management */ +typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, + struct vport_addr *vaddr); + +static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + int err; + + /* Skip mlx5_mpfs_add_mac for PFs, + * it is already done by the PF netdev in mlx5e_execute_l2_action + */ + if (!vport) + goto fdb_add; + + err = mlx5_mpfs_add_mac(esw->dev, mac); + if (err) { + esw_warn(esw->dev, + "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n", + mac, vport, err); + return err; + } + vaddr->mpfs = true; + +fdb_add: + /* SRIOV is enabled: Forward UC MAC to vport */ + if (esw->fdb_table.legacy.fdb && esw->mode == SRIOV_LEGACY) + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", + vport, mac, vaddr->flow_rule); + + return 0; +} + +static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + int err = 0; + + /* Skip mlx5_mpfs_del_mac for PFs, + * it is already done by the PF netdev in mlx5e_execute_l2_action + */ + if (!vport || !vaddr->mpfs) + goto fdb_del; + + err = mlx5_mpfs_del_mac(esw->dev, mac); + if (err) + esw_warn(esw->dev, + "Failed to del L2 table mac(%pM) for vport(%d), err(%d)\n", + mac, vport, err); + vaddr->mpfs = false; + +fdb_del: + if (vaddr->flow_rule) + mlx5_del_flow_rules(vaddr->flow_rule); + vaddr->flow_rule = NULL; + + return 0; +} + +static void update_allmulti_vports(struct mlx5_eswitch *esw, + struct vport_addr *vaddr, + struct esw_mc_addr *esw_mc) +{ + u8 *mac = vaddr->node.addr; + u32 vport_idx = 0; + + for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) { + struct mlx5_vport *vport = &esw->vports[vport_idx]; + struct hlist_head *vport_hash = vport->mc_list; + struct vport_addr *iter_vaddr = + l2addr_hash_find(vport_hash, + mac, + struct vport_addr); + if (IS_ERR_OR_NULL(vport->allmulti_rule) || + vaddr->vport == vport_idx) + continue; + switch (vaddr->action) { + case MLX5_ACTION_ADD: + if (iter_vaddr) + continue; + iter_vaddr = l2addr_hash_add(vport_hash, mac, + struct vport_addr, + GFP_KERNEL); + if (!iter_vaddr) { + esw_warn(esw->dev, + "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n", + mac, vport_idx); + continue; + } + iter_vaddr->vport = vport_idx; + iter_vaddr->flow_rule = + esw_fdb_set_vport_rule(esw, + mac, + vport_idx); + iter_vaddr->mc_promisc = true; + break; + case MLX5_ACTION_DEL: + if (!iter_vaddr) + continue; + mlx5_del_flow_rules(iter_vaddr->flow_rule); + l2addr_hash_del(iter_vaddr); + break; + } + } +} + +static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.legacy.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (esw_mc) + goto add; + + esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL); + if (!esw_mc) + return -ENOMEM; + + esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ + esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); + + /* Add this multicast mac to all the mc promiscuous vports */ + update_allmulti_vports(esw, vaddr, esw_mc); + +add: + /* If the multicast mac is added as a result of mc promiscuous vport, + * don't increment the multicast ref count + */ + if (!vaddr->mc_promisc) + esw_mc->refcnt++; + + /* Forward MC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + esw_debug(esw->dev, + "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, + esw_mc->refcnt, esw_mc->uplink_rule); + return 0; +} + +static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.legacy.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to find eswitch MC addr for MAC(%pM) vport(%d)", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, + "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, esw_mc->refcnt, + esw_mc->uplink_rule); + + if (vaddr->flow_rule) + mlx5_del_flow_rules(vaddr->flow_rule); + vaddr->flow_rule = NULL; + + /* If the multicast mac is added as a result of mc promiscuous vport, + * don't decrement the multicast ref count. + */ + if (vaddr->mc_promisc || (--esw_mc->refcnt > 0)) + return 0; + + /* Remove this multicast mac from all the mc promiscuous vports */ + update_allmulti_vports(esw, vaddr, esw_mc); + + if (esw_mc->uplink_rule) + mlx5_del_flow_rules(esw_mc->uplink_rule); + + l2addr_hash_del(esw_mc); + return 0; +} + +/* Apply vport UC/MC list to HW l2 table and FDB table */ +static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + vport_addr_action vport_addr_add; + vport_addr_action vport_addr_del; + struct vport_addr *addr; + struct l2addr_node *node; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + vport_addr_add = is_uc ? esw_add_uc_addr : + esw_add_mc_addr; + vport_addr_del = is_uc ? esw_del_uc_addr : + esw_del_mc_addr; + + hash = is_uc ? vport->uc_list : vport->mc_list; + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + switch (addr->action) { + case MLX5_ACTION_ADD: + vport_addr_add(esw, addr); + addr->action = MLX5_ACTION_NONE; + break; + case MLX5_ACTION_DEL: + vport_addr_del(esw, addr); + l2addr_hash_del(addr); + break; + } + } +} + +/* Sync vport UC/MC list from vport context */ +static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + u8 (*mac_list)[ETH_ALEN]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int size; + int err; + int hi; + int i; + + size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) : + MLX5_MAX_MC_PER_VPORT(esw->dev); + + mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!mac_list) + return; + + hash = is_uc ? vport->uc_list : vport->mc_list; + + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + + if (!vport->enabled) + goto out; + + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type, + mac_list, &size); + if (err) + goto out; + esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", + vport_num, is_uc ? "UC" : "MC", size); + + for (i = 0; i < size; i++) { + if (is_uc && !is_valid_ether_addr(mac_list[i])) + continue; + + if (!is_uc && !is_multicast_ether_addr(mac_list[i])) + continue; + + addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); + if (addr) { + addr->action = MLX5_ACTION_NONE; + /* If this mac was previously added because of allmulti + * promiscuous rx mode, its now converted to be original + * vport mac. + */ + if (addr->mc_promisc) { + struct esw_mc_addr *esw_mc = + l2addr_hash_find(esw->mc_table, + mac_list[i], + struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to MAC(%pM) in mcast DB\n", + mac_list[i]); + continue; + } + esw_mc->refcnt++; + addr->mc_promisc = false; + } + continue; + } + + addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add MAC(%pM) to vport[%d] DB\n", + mac_list[i], vport_num); + continue; + } + addr->vport = vport_num; + addr->action = MLX5_ACTION_ADD; + } +out: + kfree(mac_list); +} + +/* Sync vport UC/MC list from vport context + * Must be called after esw_update_vport_addr_list + */ +static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + hash = vport->mc_list; + + for_each_l2hash_node(node, tmp, esw->mc_table, hi) { + u8 *mac = node->addr; + + addr = l2addr_hash_find(hash, mac, struct vport_addr); + if (addr) { + if (addr->action == MLX5_ACTION_DEL) + addr->action = MLX5_ACTION_NONE; + continue; + } + addr = l2addr_hash_add(hash, mac, struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add allmulti MAC(%pM) to vport[%d] DB\n", + mac, vport_num); + continue; + } + addr->vport = vport_num; + addr->action = MLX5_ACTION_ADD; + addr->mc_promisc = true; + } +} + +/* Apply vport rx mode to HW FDB table */ +static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, + bool promisc, bool mc_promisc) +{ + struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; + struct mlx5_vport *vport = &esw->vports[vport_num]; + + if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) + goto promisc; + + if (mc_promisc) { + vport->allmulti_rule = + esw_fdb_set_vport_allmulti_rule(esw, vport_num); + if (!allmulti_addr->uplink_rule) + allmulti_addr->uplink_rule = + esw_fdb_set_vport_allmulti_rule(esw, + UPLINK_VPORT); + allmulti_addr->refcnt++; + } else if (vport->allmulti_rule) { + mlx5_del_flow_rules(vport->allmulti_rule); + vport->allmulti_rule = NULL; + + if (--allmulti_addr->refcnt > 0) + goto promisc; + + if (allmulti_addr->uplink_rule) + mlx5_del_flow_rules(allmulti_addr->uplink_rule); + allmulti_addr->uplink_rule = NULL; + } + +promisc: + if (IS_ERR_OR_NULL(vport->promisc_rule) != promisc) + return; + + if (promisc) { + vport->promisc_rule = esw_fdb_set_vport_promisc_rule(esw, + vport_num); + } else if (vport->promisc_rule) { + mlx5_del_flow_rules(vport->promisc_rule); + vport->promisc_rule = NULL; + } +} + +/* Sync vport rx mode from vport context */ +static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + int promisc_all = 0; + int promisc_uc = 0; + int promisc_mc = 0; + int err; + + err = mlx5_query_nic_vport_promisc(esw->dev, + vport_num, + &promisc_uc, + &promisc_mc, + &promisc_all); + if (err) + return; + esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n", + vport_num, promisc_all, promisc_mc); + + if (!vport->info.trusted || !vport->enabled) { + promisc_uc = 0; + promisc_mc = 0; + promisc_all = 0; + } + + esw_apply_vport_rx_mode(esw, vport_num, promisc_all, + (promisc_all || promisc_mc)); +} + +static void esw_vport_change_handle_locked(struct mlx5_vport *vport) +{ + struct mlx5_core_dev *dev = vport->dev; + struct mlx5_eswitch *esw = dev->priv.eswitch; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", + vport->vport, mac); + + if (vport->enabled_events & UC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + } + + if (vport->enabled_events & MC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + } + + if (vport->enabled_events & PROMISC_CHANGE) { + esw_update_vport_rx_mode(esw, vport->vport); + if (!IS_ERR_OR_NULL(vport->allmulti_rule)) + esw_update_vport_mc_promisc(esw, vport->vport); + } + + if (vport->enabled_events & (PROMISC_CHANGE | MC_ADDR_CHANGE)) { + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + } + + esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); + if (vport->enabled) + arm_vport_context_events_cmd(dev, vport->vport, + vport->enabled_events); +} + +static void esw_vport_change_handler(struct work_struct *work) +{ + struct mlx5_vport *vport = + container_of(work, struct mlx5_vport, vport_change_handler); + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + mutex_lock(&esw->state_lock); + esw_vport_change_handle_locked(vport); + mutex_unlock(&esw->state_lock); +} + +static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *vlan_grp = NULL; + struct mlx5_flow_group *drop_grp = NULL; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + void *match_criteria; + u32 *flow_group_in; + /* The egress acl table contains 2 rules: + * 1)Allow traffic with vlan_tag=vst_vlan_id + * 2)Drop all other traffic. + */ + int table_size = 2; + int err = 0; + + if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) + return -EOPNOTSUPP; + + if (!IS_ERR_OR_NULL(vport->egress.acl)) + return 0; + + esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n", + vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size)); + + root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS, + vport->vport); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n", + vport->vport, err); + goto out; + } + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + vlan_grp = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(vlan_grp)) { + err = PTR_ERR(vlan_grp); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n", + vport->vport, err); + goto out; + } + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + drop_grp = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(drop_grp)) { + err = PTR_ERR(drop_grp); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n", + vport->vport, err); + goto out; + } + + vport->egress.acl = acl; + vport->egress.drop_grp = drop_grp; + vport->egress.allowed_vlans_grp = vlan_grp; +out: + kvfree(flow_group_in); + if (err && !IS_ERR_OR_NULL(vlan_grp)) + mlx5_destroy_flow_group(vlan_grp); + if (err && !IS_ERR_OR_NULL(acl)) + mlx5_destroy_flow_table(acl); + return err; +} + +static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) + mlx5_del_flow_rules(vport->egress.allowed_vlan); + + if (!IS_ERR_OR_NULL(vport->egress.drop_rule)) + mlx5_del_flow_rules(vport->egress.drop_rule); + + vport->egress.allowed_vlan = NULL; + vport->egress.drop_rule = NULL; +} + +static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->egress.acl)) + return; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport); + + esw_vport_cleanup_egress_rules(esw, vport); + mlx5_destroy_flow_group(vport->egress.allowed_vlans_grp); + mlx5_destroy_flow_group(vport->egress.drop_grp); + mlx5_destroy_flow_table(vport->egress.acl); + vport->egress.allowed_vlans_grp = NULL; + vport->egress.drop_grp = NULL; + vport->egress.acl = NULL; +} + +static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + /* The ingress acl table contains 4 groups + * (2 active rules at the same time - + * 1 allow rule from one of the first 3 groups. + * 1 drop rule from the last group): + * 1)Allow untagged traffic with smac=original mac. + * 2)Allow untagged traffic. + * 3)Allow traffic with smac=original mac. + * 4)Drop all other traffic. + */ + int table_size = 4; + int err = 0; + + if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) + return -EOPNOTSUPP; + + if (!IS_ERR_OR_NULL(vport->ingress.acl)) + return 0; + + esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n", + vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); + + root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + vport->vport); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.acl = acl; + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged spoofchk flow group, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.allow_untagged_spoofchk_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged flow group, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.allow_untagged_only_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress spoofchk flow group, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.allow_spoofchk_only_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress drop flow group, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.drop_grp = g; + +out: + if (err) { + if (!IS_ERR_OR_NULL(vport->ingress.allow_spoofchk_only_grp)) + mlx5_destroy_flow_group( + vport->ingress.allow_spoofchk_only_grp); + if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_only_grp)) + mlx5_destroy_flow_group( + vport->ingress.allow_untagged_only_grp); + if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_spoofchk_grp)) + mlx5_destroy_flow_group( + vport->ingress.allow_untagged_spoofchk_grp); + if (!IS_ERR_OR_NULL(vport->ingress.acl)) + mlx5_destroy_flow_table(vport->ingress.acl); + } + + kvfree(flow_group_in); + return err; +} + +static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->ingress.drop_rule)) + mlx5_del_flow_rules(vport->ingress.drop_rule); + + if (!IS_ERR_OR_NULL(vport->ingress.allow_rule)) + mlx5_del_flow_rules(vport->ingress.allow_rule); + + vport->ingress.drop_rule = NULL; + vport->ingress.allow_rule = NULL; +} + +static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->ingress.acl)) + return; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport); + + esw_vport_cleanup_ingress_rules(esw, vport); + mlx5_destroy_flow_group(vport->ingress.allow_spoofchk_only_grp); + mlx5_destroy_flow_group(vport->ingress.allow_untagged_only_grp); + mlx5_destroy_flow_group(vport->ingress.allow_untagged_spoofchk_grp); + mlx5_destroy_flow_group(vport->ingress.drop_grp); + mlx5_destroy_flow_table(vport->ingress.acl); + vport->ingress.acl = NULL; + vport->ingress.drop_grp = NULL; + vport->ingress.allow_spoofchk_only_grp = NULL; + vport->ingress.allow_untagged_only_grp = NULL; + vport->ingress.allow_untagged_spoofchk_grp = NULL; +} + +static int esw_vport_ingress_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_fc *counter = vport->ingress.drop_counter; + struct mlx5_flow_destination drop_ctr_dst = {0}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_spec *spec; + int dest_num = 0; + int err = 0; + u8 *smac_v; + + esw_vport_cleanup_ingress_rules(esw, vport); + + if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { + esw_vport_disable_ingress_acl(esw, vport); + return 0; + } + + err = esw_vport_enable_ingress_acl(esw, vport); + if (err) { + mlx5_core_warn(esw->dev, + "failed to enable ingress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } + + esw_debug(esw->dev, + "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->info.vlan, vport->info.qos); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + + if (vport->info.vlan || vport->info.qos) + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + + if (vport->info.spoofchk) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); + smac_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + outer_headers.smac_47_16); + ether_addr_copy(smac_v, vport->info.mac); + } + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + vport->ingress.allow_rule = + mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.allow_rule)) { + err = PTR_ERR(vport->ingress.allow_rule); + esw_warn(esw->dev, + "vport[%d] configure ingress allow rule, err(%d)\n", + vport->vport, err); + vport->ingress.allow_rule = NULL; + goto out; + } + + memset(spec, 0, sizeof(*spec)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + + /* Attach drop flow counter */ + if (counter) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + drop_ctr_dst.counter = counter; + dst = &drop_ctr_dst; + dest_num++; + } + vport->ingress.drop_rule = + mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, dst, dest_num); + if (IS_ERR(vport->ingress.drop_rule)) { + err = PTR_ERR(vport->ingress.drop_rule); + esw_warn(esw->dev, + "vport[%d] configure ingress drop rule, err(%d)\n", + vport->vport, err); + vport->ingress.drop_rule = NULL; + goto out; + } + +out: + if (err) + esw_vport_cleanup_ingress_rules(esw, vport); + kvfree(spec); + return err; +} + +static int esw_vport_egress_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_fc *counter = vport->egress.drop_counter; + struct mlx5_flow_destination drop_ctr_dst = {0}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_spec *spec; + int dest_num = 0; + int err = 0; + + esw_vport_cleanup_egress_rules(esw, vport); + + if (!vport->info.vlan && !vport->info.qos) { + esw_vport_disable_egress_acl(esw, vport); + return 0; + } + + err = esw_vport_enable_egress_acl(esw, vport); + if (err) { + mlx5_core_warn(esw->dev, + "failed to enable egress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } + + esw_debug(esw->dev, + "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->info.vlan, vport->info.qos); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + + /* Allowed vlan rule */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + vport->egress.allowed_vlan = + mlx5_add_flow_rules(vport->egress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->egress.allowed_vlan)) { + err = PTR_ERR(vport->egress.allowed_vlan); + esw_warn(esw->dev, + "vport[%d] configure egress allowed vlan rule failed, err(%d)\n", + vport->vport, err); + vport->egress.allowed_vlan = NULL; + goto out; + } + + /* Drop others rule (star rule) */ + memset(spec, 0, sizeof(*spec)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + + /* Attach egress drop flow counter */ + if (counter) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + drop_ctr_dst.counter = counter; + dst = &drop_ctr_dst; + dest_num++; + } + vport->egress.drop_rule = + mlx5_add_flow_rules(vport->egress.acl, spec, + &flow_act, dst, dest_num); + if (IS_ERR(vport->egress.drop_rule)) { + err = PTR_ERR(vport->egress.drop_rule); + esw_warn(esw->dev, + "vport[%d] configure egress drop rule failed, err(%d)\n", + vport->vport, err); + vport->egress.drop_rule = NULL; + } +out: + kvfree(spec); + return err; +} + +/* Vport QoS management */ +static int esw_create_tsar(struct mlx5_eswitch *esw) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + struct mlx5_core_dev *dev = esw->dev; + int err; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return 0; + + if (esw->qos.enabled) + return -EEXIST; + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + tsar_ctx, + &esw->qos.root_tsar_id); + if (err) { + esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err); + return err; + } + + esw->qos.enabled = true; + return 0; +} + +static void esw_destroy_tsar(struct mlx5_eswitch *esw) +{ + int err; + + if (!esw->qos.enabled) + return; + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_id); + if (err) + esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err); + + esw->qos.enabled = false; +} + +static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, + u32 initial_max_rate, u32 initial_bw_share) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_core_dev *dev = esw->dev; + void *vport_elem; + int err = 0; + + if (!esw->qos.enabled || !MLX5_CAP_GEN(dev, qos) || + !MLX5_CAP_QOS(dev, esw_scheduling)) + return 0; + + if (vport->qos.enabled) + return -EEXIST; + + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); + vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, + element_attributes); + MLX5_SET(vport_element, vport_elem, vport_number, vport_num); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, + esw->qos.root_tsar_id); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, + initial_max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, initial_bw_share); + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + &vport->qos.esw_tsar_ix); + if (err) { + esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", + vport_num, err); + return err; + } + + vport->qos.enabled = true; + return 0; +} + +static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + int err = 0; + + if (!vport->qos.enabled) + return; + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + vport->qos.esw_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", + vport_num, err); + + vport->qos.enabled = false; +} + +static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, + u32 max_rate, u32 bw_share) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_core_dev *dev = esw->dev; + void *vport_elem; + u32 bitmask = 0; + int err = 0; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return -EOPNOTSUPP; + + if (!vport->qos.enabled) + return -EIO; + + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); + vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, + element_attributes); + MLX5_SET(vport_element, vport_elem, vport_number, vport_num); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, + esw->qos.root_tsar_id); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, + max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + + err = mlx5_modify_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + vport->qos.esw_tsar_ix, + bitmask); + if (err) { + esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", + vport_num, err); + return err; + } + + return 0; +} + +static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + +static void esw_apply_vport_conf(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int vport_num = vport->vport; + + if (!vport_num) + return; + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport_num, + vport->info.link_state); + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid); + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, + (vport->info.vlan || vport->info.qos)); + + /* Only legacy mode needs ACLs */ + if (esw->mode == SRIOV_LEGACY) { + esw_vport_ingress_config(esw, vport); + esw_vport_egress_config(esw, vport); + } +} + +static void esw_vport_create_drop_counters(struct mlx5_vport *vport) +{ + struct mlx5_core_dev *dev = vport->dev; + + if (MLX5_CAP_ESW_INGRESS_ACL(dev, flow_counter)) { + vport->ingress.drop_counter = mlx5_fc_create(dev, false); + if (IS_ERR(vport->ingress.drop_counter)) { + esw_warn(dev, + "vport[%d] configure ingress drop rule counter failed\n", + vport->vport); + vport->ingress.drop_counter = NULL; + } + } + + if (MLX5_CAP_ESW_EGRESS_ACL(dev, flow_counter)) { + vport->egress.drop_counter = mlx5_fc_create(dev, false); + if (IS_ERR(vport->egress.drop_counter)) { + esw_warn(dev, + "vport[%d] configure egress drop rule counter failed\n", + vport->vport); + vport->egress.drop_counter = NULL; + } + } +} + +static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport) +{ + struct mlx5_core_dev *dev = vport->dev; + + if (vport->ingress.drop_counter) + mlx5_fc_destroy(dev, vport->ingress.drop_counter); + if (vport->egress.drop_counter) + mlx5_fc_destroy(dev, vport->egress.drop_counter); +} + +static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, + int enable_events) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + + mutex_lock(&esw->state_lock); + WARN_ON(vport->enabled); + + esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + + /* Create steering drop counters for ingress and egress ACLs */ + if (vport_num && esw->mode == SRIOV_LEGACY) + esw_vport_create_drop_counters(vport); + + /* Restore old vport configuration */ + esw_apply_vport_conf(esw, vport); + + /* Attach vport to the eswitch rate limiter */ + if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate, + vport->qos.bw_share)) + esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num); + + /* Sync with current vport context */ + vport->enabled_events = enable_events; + vport->enabled = true; + + /* only PF is trusted by default */ + if (!vport_num) + vport->info.trusted = true; + + esw_vport_change_handle_locked(vport); + + esw->enabled_vports++; + esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); + mutex_unlock(&esw->state_lock); +} + +static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + + if (!vport->enabled) + return; + + esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); + /* Mark this vport as disabled to discard new events */ + vport->enabled = false; + + synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC)); + /* Wait for current already scheduled events to complete */ + flush_workqueue(esw->work_queue); + /* Disable events from this vport */ + arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + mutex_lock(&esw->state_lock); + /* We don't assume VFs will cleanup after themselves. + * Calling vport change handler while vport is disabled will cleanup + * the vport resources. + */ + esw_vport_change_handle_locked(vport); + vport->enabled_events = 0; + esw_vport_disable_qos(esw, vport_num); + if (vport_num && esw->mode == SRIOV_LEGACY) { + mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport_num, + MLX5_VPORT_ADMIN_STATE_DOWN); + esw_vport_disable_egress_acl(esw, vport); + esw_vport_disable_ingress_acl(esw, vport); + esw_vport_destroy_drop_counters(vport); + } + esw->enabled_vports--; + mutex_unlock(&esw->state_lock); +} + +/* Public E-Switch API */ +#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) + + +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) +{ + int err; + int i, enabled_events; + + if (!ESW_ALLOWED(esw) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support)) + esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n"); + + if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) + esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); + + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); + esw->mode = mode; + + if (mode == SRIOV_LEGACY) { + err = esw_create_legacy_fdb_table(esw); + } else { + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + + err = esw_offloads_init(esw, nvfs + 1); + } + + if (err) + goto abort; + + err = esw_create_tsar(esw); + if (err) + esw_warn(esw->dev, "Failed to create eswitch TSAR"); + + /* Don't enable vport events when in SRIOV_OFFLOADS mode, since: + * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode + * 2. FDB/Eswitch is programmed by user space tools + */ + enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0; + for (i = 0; i <= nvfs; i++) + esw_enable_vport(esw, i, enabled_events); + + esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", + esw->enabled_vports); + return 0; + +abort: + esw->mode = SRIOV_NONE; + + if (mode == SRIOV_OFFLOADS) + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + + return err; +} + +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) +{ + struct esw_mc_addr *mc_promisc; + int old_mode; + int nvports; + int i; + + if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) + return; + + esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", + esw->enabled_vports, esw->mode); + + mc_promisc = &esw->mc_promisc; + nvports = esw->enabled_vports; + + for (i = 0; i < esw->total_vports; i++) + esw_disable_vport(esw, i); + + if (mc_promisc && mc_promisc->uplink_rule) + mlx5_del_flow_rules(mc_promisc->uplink_rule); + + esw_destroy_tsar(esw); + + if (esw->mode == SRIOV_LEGACY) + esw_destroy_legacy_fdb_table(esw); + else if (esw->mode == SRIOV_OFFLOADS) + esw_offloads_cleanup(esw, nvports); + + old_mode = esw->mode; + esw->mode = SRIOV_NONE; + + if (old_mode == SRIOV_OFFLOADS) + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); +} + +int mlx5_eswitch_init(struct mlx5_core_dev *dev) +{ + int total_vports = MLX5_TOTAL_VPORTS(dev); + struct mlx5_eswitch *esw; + int vport_num; + int err; + + if (!MLX5_VPORT_MANAGER(dev)) + return 0; + + esw_info(dev, + "Total vports %d, per vport: max uc(%d) max mc(%d)\n", + total_vports, + MLX5_MAX_UC_PER_VPORT(dev), + MLX5_MAX_MC_PER_VPORT(dev)); + + esw = kzalloc(sizeof(*esw), GFP_KERNEL); + if (!esw) + return -ENOMEM; + + esw->dev = dev; + + esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); + if (!esw->work_queue) { + err = -ENOMEM; + goto abort; + } + + esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport), + GFP_KERNEL); + if (!esw->vports) { + err = -ENOMEM; + goto abort; + } + + err = esw_offloads_init_reps(esw); + if (err) + goto abort; + + hash_init(esw->offloads.encap_tbl); + hash_init(esw->offloads.mod_hdr_tbl); + mutex_init(&esw->state_lock); + + for (vport_num = 0; vport_num < total_vports; vport_num++) { + struct mlx5_vport *vport = &esw->vports[vport_num]; + + vport->vport = vport_num; + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + vport->dev = dev; + INIT_WORK(&vport->vport_change_handler, + esw_vport_change_handler); + } + + esw->total_vports = total_vports; + esw->enabled_vports = 0; + esw->mode = SRIOV_NONE; + esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; + else + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; + + dev->priv.eswitch = esw; + return 0; +abort: + if (esw->work_queue) + destroy_workqueue(esw->work_queue); + esw_offloads_cleanup_reps(esw); + kfree(esw->vports); + kfree(esw); + return err; +} + +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) + return; + + esw_info(esw->dev, "cleanup\n"); + + esw->dev->priv.eswitch = NULL; + destroy_workqueue(esw->work_queue); + esw_offloads_cleanup_reps(esw); + kfree(esw->vports); + kfree(esw); +} + +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) +{ + struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; + u16 vport_num = be16_to_cpu(vc_eqe->vport_num); + struct mlx5_vport *vport; + + if (!esw) { + pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", + vport_num); + return; + } + + vport = &esw->vports[vport_num]; + if (vport->enabled) + queue_work(esw->work_queue, &vport->vport_change_handler); +} + +/* Vport Administration */ +#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) + +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]) +{ + struct mlx5_vport *evport; + u64 node_guid; + int err = 0; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) + return -EINVAL; + + mutex_lock(&esw->state_lock); + evport = &esw->vports[vport]; + + if (evport->info.spoofchk && !is_valid_ether_addr(mac)) + mlx5_core_warn(esw->dev, + "Set invalid MAC while spoofchk is on, vport(%d)\n", + vport); + + err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", + vport, err); + goto unlock; + } + + node_guid_gen_from_mac(&node_guid, mac); + err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid); + if (err) + mlx5_core_warn(esw->dev, + "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n", + vport, err); + + ether_addr_copy(evport->info.mac, mac); + evport->info.node_guid = node_guid; + if (evport->enabled && esw->mode == SRIOV_LEGACY) + err = esw_vport_ingress_config(esw, evport); + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state) +{ + struct mlx5_vport *evport; + int err = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + mutex_lock(&esw->state_lock); + evport = &esw->vports[vport]; + + err = mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport, link_state); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to set vport %d link state, err = %d", + vport, err); + goto unlock; + } + + evport->info.link_state = link_state; + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct ifla_vf_info *ivi) +{ + struct mlx5_vport *evport; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + evport = &esw->vports[vport]; + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vport - 1; + + mutex_lock(&esw->state_lock); + ether_addr_copy(ivi->mac, evport->info.mac); + ivi->linkstate = evport->info.link_state; + ivi->vlan = evport->info.vlan; + ivi->qos = evport->info.qos; + ivi->spoofchk = evport->info.spoofchk; + ivi->trusted = evport->info.trusted; + ivi->min_tx_rate = evport->info.min_rate; + ivi->max_tx_rate = evport->info.max_rate; + mutex_unlock(&esw->state_lock); + + return 0; +} + +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos, u8 set_flags) +{ + struct mlx5_vport *evport; + int err = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) + return -EINVAL; + + mutex_lock(&esw->state_lock); + evport = &esw->vports[vport]; + + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); + if (err) + goto unlock; + + evport->info.vlan = vlan; + evport->info.qos = qos; + if (evport->enabled && esw->mode == SRIOV_LEGACY) { + err = esw_vport_ingress_config(esw, evport); + if (err) + goto unlock; + err = esw_vport_egress_config(esw, evport); + } + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos) +{ + u8 set_flags = 0; + + if (vlan || qos) + set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; + + return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); +} + +int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, + int vport, bool spoofchk) +{ + struct mlx5_vport *evport; + bool pschk; + int err = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + mutex_lock(&esw->state_lock); + evport = &esw->vports[vport]; + pschk = evport->info.spoofchk; + evport->info.spoofchk = spoofchk; + if (pschk && !is_valid_ether_addr(evport->info.mac)) + mlx5_core_warn(esw->dev, + "Spoofchk in set while MAC is invalid, vport(%d)\n", + evport->vport); + if (evport->enabled && esw->mode == SRIOV_LEGACY) + err = esw_vport_ingress_config(esw, evport); + if (err) + evport->info.spoofchk = pschk; + mutex_unlock(&esw->state_lock); + + return err; +} + +int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, + int vport, bool setting) +{ + struct mlx5_vport *evport; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + mutex_lock(&esw->state_lock); + evport = &esw->vports[vport]; + evport->info.trusted = setting; + if (evport->enabled) + esw_vport_change_handle_locked(evport); + mutex_unlock(&esw->state_lock); + + return 0; +} + +static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_vport *evport; + u32 max_guarantee = 0; + int i; + + for (i = 0; i < esw->total_vports; i++) { + evport = &esw->vports[i]; + if (!evport->enabled || evport->info.min_rate < max_guarantee) + continue; + max_guarantee = evport->info.min_rate; + } + + if (max_guarantee) + return max_t(u32, max_guarantee / fw_max_bw_share, 1); + return 0; +} + +static int normalize_vports_min_rate(struct mlx5_eswitch *esw) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + u32 divider = calculate_vports_min_rate_divider(esw); + struct mlx5_vport *evport; + u32 vport_max_rate; + u32 vport_min_rate; + u32 bw_share; + int err; + int i; + + for (i = 0; i < esw->total_vports; i++) { + evport = &esw->vports[i]; + if (!evport->enabled) + continue; + vport_min_rate = evport->info.min_rate; + vport_max_rate = evport->info.max_rate; + bw_share = 0; + + if (divider) + bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate, + divider, + fw_max_bw_share); + + if (bw_share == evport->qos.bw_share) + continue; + + err = esw_vport_qos_config(esw, i, vport_max_rate, + bw_share); + if (!err) + evport->qos.bw_share = bw_share; + else + return err; + } + + return 0; +} + +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, + u32 max_rate, u32 min_rate) +{ + struct mlx5_vport *evport; + u32 fw_max_bw_share; + u32 previous_min_rate; + bool min_rate_supported; + bool max_rate_supported; + int err = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share >= MLX5_MIN_BW_SHARE; + max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + + if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported)) + return -EOPNOTSUPP; + + mutex_lock(&esw->state_lock); + evport = &esw->vports[vport]; + + if (min_rate == evport->info.min_rate) + goto set_max_rate; + + previous_min_rate = evport->info.min_rate; + evport->info.min_rate = min_rate; + err = normalize_vports_min_rate(esw); + if (err) { + evport->info.min_rate = previous_min_rate; + goto unlock; + } + +set_max_rate: + if (max_rate == evport->info.max_rate) + goto unlock; + + err = esw_vport_qos_config(esw, vport, max_rate, evport->qos.bw_share); + if (!err) + evport->info.max_rate = max_rate; + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, + int vport_idx, + struct mlx5_vport_drop_stats *stats) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_vport *vport = &esw->vports[vport_idx]; + u64 rx_discard_vport_down, tx_discard_vport_down; + u64 bytes = 0; + int err = 0; + + if (!vport->enabled || esw->mode != SRIOV_LEGACY) + return 0; + + if (vport->egress.drop_counter) + mlx5_fc_query(dev, vport->egress.drop_counter, + &stats->rx_dropped, &bytes); + + if (vport->ingress.drop_counter) + mlx5_fc_query(dev, vport->ingress.drop_counter, + &stats->tx_dropped, &bytes); + + if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && + !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) + return 0; + + err = mlx5_query_vport_down_stats(dev, vport_idx, + &rx_discard_vport_down, + &tx_discard_vport_down); + if (err) + return err; + + if (MLX5_CAP_GEN(dev, receive_discard_vport_down)) + stats->rx_dropped += rx_discard_vport_down; + if (MLX5_CAP_GEN(dev, transmit_discard_vport_down)) + stats->tx_dropped += tx_discard_vport_down; + + return 0; +} + +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + int vport, + struct ifla_vf_stats *vf_stats) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0}; + struct mlx5_vport_drop_stats stats = {0}; + int err = 0; + u32 *out; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + + memset(out, 0, outlen); + err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); + if (err) + goto free_out; + + #define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) + + memset(vf_stats, 0, sizeof(*vf_stats)); + vf_stats->rx_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets) + + MLX5_GET_CTR(out, received_ib_unicast.packets) + + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_ib_multicast.packets) + + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + vf_stats->rx_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets) + + MLX5_GET_CTR(out, received_ib_unicast.octets) + + MLX5_GET_CTR(out, received_eth_multicast.octets) + + MLX5_GET_CTR(out, received_ib_multicast.octets) + + MLX5_GET_CTR(out, received_eth_broadcast.octets); + + vf_stats->tx_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + + MLX5_GET_CTR(out, transmitted_ib_unicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + + vf_stats->tx_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + + MLX5_GET_CTR(out, transmitted_ib_unicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.octets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + vf_stats->multicast = + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_ib_multicast.packets); + + vf_stats->broadcast = + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + err = mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats); + if (err) + goto free_out; + vf_stats->rx_dropped = stats.rx_dropped; + vf_stats->tx_dropped = stats.tx_dropped; + +free_out: + kvfree(out); + return err; +} + +u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) +{ + return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE; +} +EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h new file mode 100644 index 000000000..c17bfcab5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_ESWITCH_H__ +#define __MLX5_ESWITCH_H__ + +#include <linux/if_ether.h> +#include <linux/if_link.h> +#include <net/devlink.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/eswitch.h> +#include <linux/mlx5/fs.h> +#include "lib/mpfs.h" + +#ifdef CONFIG_MLX5_ESWITCH + +#define MLX5_MAX_UC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) + +#define MLX5_MAX_MC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) + +#define FDB_UPLINK_VPORT 0xffff + +#define MLX5_MIN_BW_SHARE 1 + +#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ + min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit) + +#define mlx5_esw_has_fwd_fdb(dev) \ + MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table) + +struct vport_ingress { + struct mlx5_flow_table *acl; + struct mlx5_flow_group *allow_untagged_spoofchk_grp; + struct mlx5_flow_group *allow_spoofchk_only_grp; + struct mlx5_flow_group *allow_untagged_only_grp; + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_handle *allow_rule; + struct mlx5_flow_handle *drop_rule; + struct mlx5_fc *drop_counter; +}; + +struct vport_egress { + struct mlx5_flow_table *acl; + struct mlx5_flow_group *allowed_vlans_grp; + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_handle *allowed_vlan; + struct mlx5_flow_handle *drop_rule; + struct mlx5_fc *drop_counter; +}; + +struct mlx5_vport_drop_stats { + u64 rx_dropped; + u64 tx_dropped; +}; + +struct mlx5_vport_info { + u8 mac[ETH_ALEN]; + u16 vlan; + u8 qos; + u64 node_guid; + int link_state; + u32 min_rate; + u32 max_rate; + bool spoofchk; + bool trusted; +}; + +struct mlx5_vport { + struct mlx5_core_dev *dev; + int vport; + struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct mlx5_flow_handle *promisc_rule; + struct mlx5_flow_handle *allmulti_rule; + struct work_struct vport_change_handler; + + struct vport_ingress ingress; + struct vport_egress egress; + + struct mlx5_vport_info info; + + struct { + bool enabled; + u32 esw_tsar_ix; + u32 bw_share; + } qos; + + bool enabled; + u16 enabled_events; +}; + +struct mlx5_eswitch_fdb { + union { + struct legacy_fdb { + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *addr_grp; + struct mlx5_flow_group *allmulti_grp; + struct mlx5_flow_group *promisc_grp; + } legacy; + + struct offloads_fdb { + struct mlx5_flow_table *fast_fdb; + struct mlx5_flow_table *fwd_fdb; + struct mlx5_flow_table *slow_fdb; + struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *miss_grp; + struct mlx5_flow_handle *miss_rule_uni; + struct mlx5_flow_handle *miss_rule_multi; + int vlan_push_pop_refcount; + } offloads; + }; +}; + +struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_group *vport_rx_group; + struct mlx5_eswitch_rep *vport_reps; + DECLARE_HASHTABLE(encap_tbl, 8); + DECLARE_HASHTABLE(mod_hdr_tbl, 8); + u8 inline_mode; + u64 num_flows; + u8 encap; +}; + +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; +}; + +struct mlx5_eswitch { + struct mlx5_core_dev *dev; + struct mlx5_eswitch_fdb fdb_table; + struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; + struct workqueue_struct *work_queue; + struct mlx5_vport *vports; + int total_vports; + int enabled_vports; + /* Synchronize between vport change events + * and async SRIOV admin state changes + */ + struct mutex state_lock; + struct esw_mc_addr mc_promisc; + + struct { + bool enabled; + u32 root_tsar_id; + } qos; + + struct mlx5_esw_offload offloads; + int mode; +}; + +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); +int esw_offloads_init_reps(struct mlx5_eswitch *esw); + +/* E-Switch API */ +int mlx5_eswitch_init(struct mlx5_core_dev *dev); +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]); +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state); +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos); +int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, + int vport, bool spoofchk); +int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, + int vport_num, bool setting); +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, + u32 max_rate, u32 min_rate); +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct ifla_vf_info *ivi); +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + int vport, + struct ifla_vf_stats *vf_stats); +void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); + +struct mlx5_flow_spec; +struct mlx5_esw_flow_attr; + +struct mlx5_flow_handle * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr); +struct mlx5_flow_handle * +mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr); +void +mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr); + +struct mlx5_flow_handle * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); + +enum { + SET_VLAN_STRIP = BIT(0), + SET_VLAN_INSERT = BIT(1) +}; + +enum mlx5_flow_match_level { + MLX5_MATCH_NONE = MLX5_INLINE_MODE_NONE, + MLX5_MATCH_L2 = MLX5_INLINE_MODE_L2, + MLX5_MATCH_L3 = MLX5_INLINE_MODE_IP, + MLX5_MATCH_L4 = MLX5_INLINE_MODE_TCP_UDP, +}; + +/* current maximum for flow based vport multicasting */ +#define MLX5_MAX_FLOW_FWD_VPORTS 2 + +struct mlx5_esw_flow_attr { + struct mlx5_eswitch_rep *in_rep; + struct mlx5_eswitch_rep *out_rep[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5_core_dev *out_mdev[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5_core_dev *in_mdev; + + int mirror_count; + int out_count; + + int action; + __be16 vlan_proto[MLX5_FS_VLAN_DEPTH]; + u16 vlan_vid[MLX5_FS_VLAN_DEPTH]; + u8 vlan_prio[MLX5_FS_VLAN_DEPTH]; + u8 total_vlan; + bool vlan_handled; + u32 encap_id; + u32 mod_hdr_id; + u8 match_level; + struct mlx5e_tc_flow_parse_attr *parse_attr; +}; + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); +int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode); +int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); +int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap); +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); +void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); + +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr); +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr); +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos, u8 set_flags); + +static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, + u8 vlan_depth) +{ + bool ret = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan); + + if (vlan_depth == 1) + return ret; + + return ret && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan_2) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2); +} + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(dev, format, ...) \ + pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_warn(dev, format, ...) \ + pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) +#else /* CONFIG_MLX5_ESWITCH */ +/* eswitch API stubs */ +static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} +static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {} +static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } +static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c new file mode 100644 index 000000000..3028e8d90 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -0,0 +1,1368 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "mlx5_core.h" +#include "eswitch.h" + +enum { + FDB_FAST_PATH = 0, + FDB_SLOW_PATH +}; + +struct mlx5_flow_handle * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_table *ft = NULL; + struct mlx5_fc *counter = NULL; + struct mlx5_flow_handle *rule; + int j, i = 0; + void *misc; + + if (esw->mode != SRIOV_OFFLOADS) + return ERR_PTR(-EOPNOTSUPP); + + if (attr->mirror_count) + ft = esw->fdb_table.offloads.fwd_fdb; + else + ft = esw->fdb_table.offloads.fast_fdb; + + flow_act.action = attr->action; + /* if per flow vlan pop/push is emulated, don't set that into the firmware */ + if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) + flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto[0]); + flow_act.vlan[0].vid = attr->vlan_vid[0]; + flow_act.vlan[0].prio = attr->vlan_prio[0]; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + flow_act.vlan[1].ethtype = ntohs(attr->vlan_proto[1]); + flow_act.vlan[1].vid = attr->vlan_vid[1]; + flow_act.vlan[1].prio = attr->vlan_prio[1]; + } + } + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + for (j = attr->mirror_count; j < attr->out_count; j++) { + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[i].vport.num = attr->out_rep[j]->vport; + dest[i].vport.vhca_id = + MLX5_CAP_GEN(attr->out_mdev[j], vhca_id); + dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + i++; + } + } + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(counter)) { + rule = ERR_CAST(counter); + goto err_counter_alloc; + } + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[i].counter = counter; + i++; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); + + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + if (attr->match_level == MLX5_MATCH_NONE) + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + else + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS; + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + flow_act.modify_id = attr->mod_hdr_id; + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + flow_act.encap_id = attr->encap_id; + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, i); + if (IS_ERR(rule)) + goto err_add_rule; + else + esw->offloads.num_flows++; + + return rule; + +err_add_rule: + mlx5_fc_destroy(esw->dev, counter); +err_counter_alloc: + return rule; +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle *rule; + void *misc; + int i; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + for (i = 0; i < attr->mirror_count; i++) { + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[i].vport.num = attr->out_rep[i]->vport; + dest[i].vport.vhca_id = + MLX5_CAP_GEN(attr->out_mdev[i], vhca_id); + dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + } + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = esw->fdb_table.offloads.fwd_fdb, + i++; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); + + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + if (attr->match_level == MLX5_MATCH_NONE) + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + else + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS; + + rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fast_fdb, spec, &flow_act, dest, i); + + if (!IS_ERR(rule)) + esw->offloads.num_flows++; + + return rule; +} + +void +mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_fc *counter = NULL; + + counter = mlx5_flow_rule_counter(rule); + mlx5_del_flow_rules(rule); + mlx5_fc_destroy(esw->dev, counter); + esw->offloads.num_flows--; +} + +static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) +{ + struct mlx5_eswitch_rep *rep; + int vf_vport, err = 0; + + esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); + for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { + rep = &esw->offloads.vport_reps[vf_vport]; + if (!rep->rep_if[REP_ETH].valid) + continue; + + err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); + if (err) + goto out; + } + +out: + return err; +} + +static struct mlx5_eswitch_rep * +esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL; + + in_rep = attr->in_rep; + out_rep = attr->out_rep[0]; + + if (push) + vport = in_rep; + else if (pop) + vport = out_rep; + else + vport = in_rep; + + return vport; +} + +static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, + bool push, bool pop, bool fwd) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep; + + if ((push || pop) && !fwd) + goto out_notsupp; + + in_rep = attr->in_rep; + out_rep = attr->out_rep[0]; + + if (push && in_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + if (pop && out_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ + if (!push && !pop && fwd) + if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + /* protects against (1) setting rules with different vlans to push and + * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0) + */ + if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid[0])) + goto out_notsupp; + + return 0; + +out_notsupp: + return -EOPNOTSUPP; +} + +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + /* nop if we're on the vlan push/pop non emulation mode */ + if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) + return 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + err = esw_add_vlan_action_check(attr, push, pop, fwd); + if (err) + return err; + + attr->vlan_handled = false; + + vport = esw_vlan_action_get_vport(attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) { + vport->vlan_refcount++; + attr->vlan_handled = true; + } + + return 0; + } + + if (!push && !pop) + return 0; + + if (!(offloads->vlan_push_pop_refcount)) { + /* it's the 1st vlan rule, apply global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP); + if (err) + goto out; + } + offloads->vlan_push_pop_refcount++; + + if (push) { + if (vport->vlan_refcount) + goto skip_set_push; + + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid[0], 0, + SET_VLAN_INSERT | SET_VLAN_STRIP); + if (err) + goto out; + vport->vlan = attr->vlan_vid[0]; +skip_set_push: + vport->vlan_refcount++; + } +out: + if (!err) + attr->vlan_handled = true; + return err; +} + +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + /* nop if we're on the vlan push/pop non emulation mode */ + if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) + return 0; + + if (!attr->vlan_handled) + return 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + vport = esw_vlan_action_get_vport(attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) + vport->vlan_refcount--; + + return 0; + } + + if (push) { + vport->vlan_refcount--; + if (vport->vlan_refcount) + goto skip_unset_push; + + vport->vlan = 0; + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, + 0, 0, SET_VLAN_STRIP); + if (err) + goto out; + } + +skip_unset_push: + offloads->vlan_push_pop_refcount--; + if (offloads->vlan_push_pop_refcount) + return 0; + + /* no more vlan rules, stop global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, 0); + +out: + return err; +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); + MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = vport; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); +out: + kvfree(spec); + return flow_rule; +} +EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule); + +void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) +{ + mlx5_del_flow_rules(rule); +} + +static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_spec *spec; + void *headers_c; + void *headers_v; + int err = 0; + u8 *dmac_c; + u8 *dmac_v; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, + outer_headers.dmac_47_16); + dmac_c[0] = 0x01; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = 0; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err); + goto out; + } + + esw->fdb_table.offloads.miss_rule_uni = flow_rule; + + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, + outer_headers.dmac_47_16); + dmac_v[0] = 0x01; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); + goto out; + } + + esw->fdb_table.offloads.miss_rule_multi = flow_rule; + +out: + kvfree(spec); + return err; +} + +#define ESW_OFFLOADS_NUM_GROUPS 4 + +static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb = NULL; + int esw_size, err = 0; + u32 flags = 0; + u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + err = -EOPNOTSUPP; + goto out_namespace; + } + + esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), + max_flow_counter, ESW_OFFLOADS_NUM_GROUPS); + + esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS, + 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + if (mlx5_esw_has_fwd_fdb(dev)) + esw_size >>= 1; + + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN; + + fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, + esw_size, + ESW_OFFLOADS_NUM_GROUPS, 0, + flags); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); + goto out_namespace; + } + esw->fdb_table.offloads.fast_fdb = fdb; + + if (!mlx5_esw_has_fwd_fdb(dev)) + goto out_namespace; + + fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, + esw_size, + ESW_OFFLOADS_NUM_GROUPS, 1, + flags); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create fwd table err %d\n", err); + goto out_ft; + } + esw->fdb_table.offloads.fwd_fdb = fdb; + + return err; + +out_ft: + mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb); +out_namespace: + return err; +} + +static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw) +{ + if (mlx5_esw_has_fwd_fdb(esw->dev)) + mlx5_destroy_flow_table(esw->fdb_table.offloads.fwd_fdb); + mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb); +} + +#define MAX_PF_SQ 256 +#define MAX_SQ_NVPORTS 32 + +static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb = NULL; + int table_size, ix, err = 0; + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + u8 *dmac; + + esw_debug(esw->dev, "Create offloads FDB Tables\n"); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + err = -EOPNOTSUPP; + goto ns_err; + } + + err = esw_create_offloads_fast_fdb_table(esw); + if (err) + goto fast_fdb_err; + + table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2; + + ft_attr.max_fte = table_size; + ft_attr.prio = FDB_SLOW_PATH; + + fdb = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); + goto slow_fdb_err; + } + esw->fdb_table.offloads.slow_fdb = fdb; + + /* create send-to-vport group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + + ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ; + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err); + goto send_vport_err; + } + esw->fdb_table.offloads.send_to_vport_grp = g; + + /* create miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create miss flow group err(%d)\n", err); + goto miss_err; + } + esw->fdb_table.offloads.miss_grp = g; + + err = esw_add_fdb_miss_rule(esw); + if (err) + goto miss_rule_err; + + kvfree(flow_group_in); + return 0; + +miss_rule_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); +miss_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); +send_vport_err: + mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); +slow_fdb_err: + esw_destroy_offloads_fast_fdb_table(esw); +fast_fdb_err: +ns_err: + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.offloads.fast_fdb) + return; + + esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); + + mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); + esw_destroy_offloads_fast_fdb_table(esw); +} + +static int esw_create_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_namespace *ns; + int err = 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -EOPNOTSUPP; + } + + ft_attr.max_fte = dev->priv.sriov.num_vfs + 2; + + ft_offloads = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft_offloads)) { + err = PTR_ERR(ft_offloads); + esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); + return err; + } + + esw->offloads.ft_offloads = ft_offloads; + return 0; +} + +static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + mlx5_destroy_flow_table(offloads->ft_offloads); +} + +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + struct mlx5_priv *priv = &esw->dev->priv; + u32 *flow_group_in; + void *match_criteria, *misc; + int err = 0; + int nvports = priv->sriov.num_vfs + 2; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + /* create vport rx group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); + + g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in); + + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err); + goto out; + } + + esw->offloads.vport_rx_group = g; +out: + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) +{ + mlx5_destroy_flow_group(esw->offloads.vport_rx_group); +} + +struct mlx5_flow_handle * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tirn; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); + goto out; + } + +out: + kvfree(spec); + return flow_rule; +} + +static int esw_offloads_start(struct mlx5_eswitch *esw) +{ + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + + if (esw->mode != SRIOV_LEGACY) { + esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); + return -EINVAL; + } + + mlx5_eswitch_disable_sriov(esw); + err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err1); + } + if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { + if (mlx5_eswitch_inline_mode_get(esw, + num_vfs, + &esw->offloads.inline_mode)) { + esw->offloads.inline_mode = MLX5_INLINE_MODE_L2; + esw_warn(esw->dev, "Inline mode is different between vports\n"); + } + } + return err; +} + +void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) +{ + kfree(esw->offloads.vport_reps); +} + +int esw_offloads_init_reps(struct mlx5_eswitch *esw) +{ + int total_vfs = MLX5_TOTAL_VPORTS(esw->dev); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_esw_offload *offloads; + struct mlx5_eswitch_rep *rep; + u8 hw_id[ETH_ALEN]; + int vport; + + esw->offloads.vport_reps = kcalloc(total_vfs, + sizeof(struct mlx5_eswitch_rep), + GFP_KERNEL); + if (!esw->offloads.vport_reps) + return -ENOMEM; + + offloads = &esw->offloads; + mlx5_query_nic_vport_mac_address(dev, 0, hw_id); + + for (vport = 0; vport < total_vfs; vport++) { + rep = &offloads->vport_reps[vport]; + + rep->vport = vport; + ether_addr_copy(rep->hw_id, hw_id); + } + + offloads->vport_reps[0].vport = FDB_UPLINK_VPORT; + + return 0; +} + +static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int vport; + + for (vport = nvports - 1; vport >= 0; vport--) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->rep_if[rep_type].valid) + continue; + + rep->rep_if[rep_type].unload(rep); + } +} + +static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) +{ + u8 rep_type = NUM_REP_TYPES; + + while (rep_type-- > 0) + esw_offloads_unload_reps_type(esw, nvports, rep_type); +} + +static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int vport; + int err; + + for (vport = 0; vport < nvports; vport++) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->rep_if[rep_type].valid) + continue; + + err = rep->rep_if[rep_type].load(esw->dev, rep); + if (err) + goto err_reps; + } + + return 0; + +err_reps: + esw_offloads_unload_reps_type(esw, vport, rep_type); + return err; +} + +static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports) +{ + u8 rep_type = 0; + int err; + + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + err = esw_offloads_load_reps_type(esw, nvports, rep_type); + if (err) + goto err_reps; + } + + return err; + +err_reps: + while (rep_type-- > 0) + esw_offloads_unload_reps_type(esw, nvports, rep_type); + return err; +} + +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +{ + int err; + + err = esw_create_offloads_fdb_tables(esw, nvports); + if (err) + return err; + + err = esw_create_offloads_table(esw); + if (err) + goto create_ft_err; + + err = esw_create_vport_rx_group(esw); + if (err) + goto create_fg_err; + + err = esw_offloads_load_reps(esw, nvports); + if (err) + goto err_reps; + + return 0; + +err_reps: + esw_destroy_vport_rx_group(esw); + +create_fg_err: + esw_destroy_offloads_table(esw); + +create_ft_err: + esw_destroy_offloads_fdb_tables(esw); + + return err; +} + +static int esw_offloads_stop(struct mlx5_eswitch *esw) +{ + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + + mlx5_eswitch_disable_sriov(esw); + err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err); + } + + /* enable back PF RoCE */ + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + + return err; +} + +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) +{ + esw_offloads_unload_reps(esw, nvports); + esw_destroy_vport_rx_group(esw); + esw_destroy_offloads_table(esw); + esw_destroy_offloads_fdb_tables(esw); +} + +static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) +{ + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + *mlx5_mode = SRIOV_LEGACY; + break; + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + *mlx5_mode = SRIOV_OFFLOADS; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode) +{ + switch (mlx5_mode) { + case SRIOV_LEGACY: + *mode = DEVLINK_ESWITCH_MODE_LEGACY; + break; + case SRIOV_OFFLOADS: + *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode) +{ + switch (mode) { + case DEVLINK_ESWITCH_INLINE_MODE_NONE: + *mlx5_mode = MLX5_INLINE_MODE_NONE; + break; + case DEVLINK_ESWITCH_INLINE_MODE_LINK: + *mlx5_mode = MLX5_INLINE_MODE_L2; + break; + case DEVLINK_ESWITCH_INLINE_MODE_NETWORK: + *mlx5_mode = MLX5_INLINE_MODE_IP; + break; + case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT: + *mlx5_mode = MLX5_INLINE_MODE_TCP_UDP; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) +{ + switch (mlx5_mode) { + case MLX5_INLINE_MODE_NONE: + *mode = DEVLINK_ESWITCH_INLINE_MODE_NONE; + break; + case MLX5_INLINE_MODE_L2: + *mode = DEVLINK_ESWITCH_INLINE_MODE_LINK; + break; + case MLX5_INLINE_MODE_IP: + *mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK; + break; + case MLX5_INLINE_MODE_TCP_UDP: + *mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int mlx5_devlink_eswitch_check(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -EOPNOTSUPP; + + if(!MLX5_ESWITCH_MANAGER(dev)) + return -EPERM; + + if (dev->priv.eswitch->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + return 0; +} + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u16 cur_mlx5_mode, mlx5_mode = 0; + int err; + + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; + + cur_mlx5_mode = dev->priv.eswitch->mode; + + if (esw_mode_from_devlink(mode, &mlx5_mode)) + return -EINVAL; + + if (cur_mlx5_mode == mlx5_mode) + return 0; + + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + return esw_offloads_start(dev->priv.eswitch); + else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) + return esw_offloads_stop(dev->priv.eswitch); + else + return -EINVAL; +} + +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + int err; + + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; + + return esw_mode_to_devlink(dev->priv.eswitch->mode, mode); +} + +int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + int err, vport; + u8 mlx5_mode; + + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; + + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) + return 0; + /* fall through */ + case MLX5_CAP_INLINE_MODE_L2: + esw_warn(dev, "Inline mode can't be set\n"); + return -EOPNOTSUPP; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + break; + } + + if (esw->offloads.num_flows > 0) { + esw_warn(dev, "Can't set inline mode when flows are configured\n"); + return -EOPNOTSUPP; + } + + err = esw_inline_mode_from_devlink(mode, &mlx5_mode); + if (err) + goto out; + + for (vport = 1; vport < esw->enabled_vports; vport++) { + err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode); + if (err) { + esw_warn(dev, "Failed to set min inline on vport %d\n", + vport); + goto revert_inline_mode; + } + } + + esw->offloads.inline_mode = mlx5_mode; + return 0; + +revert_inline_mode: + while (--vport > 0) + mlx5_modify_nic_vport_min_inline(dev, + vport, + esw->offloads.inline_mode); +out: + return err; +} + +int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + int err; + + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; + + return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); +} + +int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) +{ + u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; + struct mlx5_core_dev *dev = esw->dev; + int vport; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + mlx5_mode = MLX5_INLINE_MODE_NONE; + goto out; + case MLX5_CAP_INLINE_MODE_L2: + mlx5_mode = MLX5_INLINE_MODE_L2; + goto out; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + goto query_vports; + } + +query_vports: + for (vport = 1; vport <= nvfs; vport++) { + mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); + if (vport > 1 && prev_mlx5_mode != mlx5_mode) + return -EINVAL; + prev_mlx5_mode = mlx5_mode; + } + +out: + *mode = mlx5_mode; + return 0; +} + +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + int err; + + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; + + if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && + (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) + return -EOPNOTSUPP; + + if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) + return -EOPNOTSUPP; + + if (esw->mode == SRIOV_LEGACY) { + esw->offloads.encap = encap; + return 0; + } + + if (esw->offloads.encap == encap) + return 0; + + if (esw->offloads.num_flows > 0) { + esw_warn(dev, "Can't set encapsulation when flows are configured\n"); + return -EOPNOTSUPP; + } + + esw_destroy_offloads_fast_fdb_table(esw); + + esw->offloads.encap = encap; + err = esw_create_offloads_fast_fdb_table(esw); + if (err) { + esw_warn(esw->dev, "Failed re-creating fast FDB table, err %d\n", err); + esw->offloads.encap = !encap; + (void)esw_create_offloads_fast_fdb_table(esw); + } + return err; +} + +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + int err; + + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; + + *encap = esw->offloads.encap; + return 0; +} + +void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + int vport_index, + struct mlx5_eswitch_rep_if *__rep_if, + u8 rep_type) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep_if *rep_if; + + rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type]; + + rep_if->load = __rep_if->load; + rep_if->unload = __rep_if->unload; + rep_if->get_proto_dev = __rep_if->get_proto_dev; + rep_if->priv = __rep_if->priv; + + rep_if->valid = true; +} +EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep); + +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport_index, u8 rep_type) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + rep = &offloads->vport_reps[vport_index]; + + if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled) + rep->rep_if[rep_type].unload(rep); + + rep->rep_if[rep_type].valid = false; +} +EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); + +void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) +{ +#define UPLINK_REP_INDEX 0 + struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + rep = &offloads->vport_reps[UPLINK_REP_INDEX]; + return rep->rep_if[rep_type].priv; +} + +void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, + int vport, + u8 rep_type) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + if (vport == FDB_UPLINK_VPORT) + vport = UPLINK_REP_INDEX; + + rep = &offloads->vport_reps[vport]; + + if (rep->rep_if[rep_type].valid && + rep->rep_if[rep_type].get_proto_dev) + return rep->rep_if[rep_type].get_proto_dev(rep); + return NULL; +} +EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); + +void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) +{ + return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type); +} +EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); + +struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, + int vport) +{ + return &esw->offloads.vport_reps[vport]; +} +EXPORT_SYMBOL(mlx5_eswitch_vport_rep); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile new file mode 100644 index 000000000..d8e17110f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c new file mode 100644 index 000000000..c0fd2212e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/cmd.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> + +#include "mlx5_core.h" +#include "fpga/cmd.h" + +#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \ + MLX5_FPGA_ACCESS_REG_SIZE_MAX) + +int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, + void *buf, bool write) +{ + u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0}; + u32 out[MLX5_FPGA_ACCESS_REG_SZ]; + int err; + + if (size & 3) + return -EINVAL; + if (addr & 3) + return -EINVAL; + if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX) + return -EINVAL; + + MLX5_SET(fpga_access_reg, in, size, size); + MLX5_SET64(fpga_access_reg, in, address, addr); + if (write) + memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_ACCESS_REG, 0, write); + if (err) + return err; + + if (!write) + memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size); + + return 0; +} + +int mlx5_fpga_caps(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0}; + + return mlx5_core_access_reg(dev, in, sizeof(in), dev->caps.fpga, + MLX5_ST_SZ_BYTES(fpga_cap), + MLX5_REG_FPGA_CAP, 0, 0); +} + +int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op) +{ + u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_ctrl)]; + + MLX5_SET(fpga_ctrl, in, operation, op); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_CTRL, 0, true); +} + +int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size) +{ + unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len); + u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr); + unsigned int read; + int ret = 0; + + if (cap_size > size) { + mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u", + size, cap_size); + return -EINVAL; + } + + while (cap_size > 0) { + read = min_t(unsigned int, cap_size, + MLX5_FPGA_ACCESS_REG_SIZE_MAX); + + ret = mlx5_fpga_access_reg(dev, read, addr, caps, false); + if (ret) { + mlx5_core_warn(dev, "Error reading FPGA SBU caps %u bytes at address 0x%llx: %d", + read, addr, ret); + return ret; + } + + cap_size -= read; + addr += read; + caps += read; + } + + return ret; +} + +int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query) +{ + u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_ctrl)]; + int err; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_CTRL, 0, false); + if (err) + return err; + + query->status = MLX5_GET(fpga_ctrl, out, status); + query->admin_image = MLX5_GET(fpga_ctrl, out, flash_select_admin); + query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper); + return 0; +} + +int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc, + u32 *fpga_qpn) +{ + u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)]; + int ret; + + MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP); + memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc, + MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc), + MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc)); + *fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn); + return ret; +} + +int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, + enum mlx5_fpga_qpc_field_select fields, + void *fpga_qpc) +{ + u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_modify_qp_out)]; + + MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP); + MLX5_SET(fpga_modify_qp_in, in, field_select, fields); + MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn); + memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc, + MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc)); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, + u32 fpga_qpn, void *fpga_qpc) +{ + u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)]; + int ret; + + MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP); + MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_query_qp_out, out, fpga_qpc), + MLX5_FLD_SZ_BYTES(fpga_query_qp_out, fpga_qpc)); + return ret; +} + +int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn) +{ + u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_destroy_qp_out)]; + + MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP); + MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn, + bool clear, struct mlx5_fpga_qp_counters *data) +{ + u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)]; + int ret; + + MLX5_SET(fpga_query_qp_counters_in, in, opcode, + MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS); + MLX5_SET(fpga_query_qp_counters_in, in, clear, clear); + MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + data->rx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_ack_packets); + data->rx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_send_packets); + data->tx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + tx_ack_packets); + data->tx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + tx_send_packets); + data->rx_total_drop = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_total_drop); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h new file mode 100644 index 000000000..eb8b0fe0b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_FPGA_H__ +#define __MLX5_FPGA_H__ + +#include <linux/mlx5/driver.h> + +enum mlx5_fpga_device_id { + MLX5_FPGA_DEVICE_UNKNOWN = 0, + MLX5_FPGA_DEVICE_KU040 = 1, + MLX5_FPGA_DEVICE_KU060 = 2, + MLX5_FPGA_DEVICE_KU060_2 = 3, +}; + +enum mlx5_fpga_image { + MLX5_FPGA_IMAGE_USER = 0, + MLX5_FPGA_IMAGE_FACTORY, +}; + +enum mlx5_fpga_status { + MLX5_FPGA_STATUS_SUCCESS = 0, + MLX5_FPGA_STATUS_FAILURE = 1, + MLX5_FPGA_STATUS_IN_PROGRESS = 2, + MLX5_FPGA_STATUS_NONE = 0xFFFF, +}; + +struct mlx5_fpga_query { + enum mlx5_fpga_image admin_image; + enum mlx5_fpga_image oper_image; + enum mlx5_fpga_status status; +}; + +enum mlx5_fpga_qpc_field_select { + MLX5_FPGA_QPC_STATE = BIT(0), +}; + +struct mlx5_fpga_qp_counters { + u64 rx_ack_packets; + u64 rx_send_packets; + u64 tx_ack_packets; + u64 tx_send_packets; + u64 rx_total_drop; +}; + +int mlx5_fpga_caps(struct mlx5_core_dev *dev); +int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query); +int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op); +int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, + void *buf, bool write); +int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size); + +int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc, + u32 *fpga_qpn); +int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, + enum mlx5_fpga_qpc_field_select fields, void *fpga_qpc); +int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, void *fpga_qpc); +int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn, + bool clear, struct mlx5_fpga_qp_counters *data); +int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn); + +#endif /* __MLX5_FPGA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c new file mode 100644 index 000000000..d8d0b6bd5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -0,0 +1,1047 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <net/addrconf.h> +#include <linux/etherdevice.h> +#include <linux/mlx5/vport.h> + +#include "mlx5_core.h" +#include "lib/mlx5.h" +#include "fpga/conn.h" + +#define MLX5_FPGA_PKEY 0xFFFF +#define MLX5_FPGA_PKEY_INDEX 0 /* RoCE PKEY 0xFFFF is always at index 0 */ +#define MLX5_FPGA_RECV_SIZE 2048 +#define MLX5_FPGA_PORT_NUM 1 +#define MLX5_FPGA_CQ_BUDGET 64 + +static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct device *dma_device; + int err = 0; + + if (unlikely(!buf->sg[0].data)) + goto out; + + dma_device = &conn->fdev->mdev->pdev->dev; + buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data, + buf->sg[0].size, buf->dma_dir); + err = dma_mapping_error(dma_device, buf->sg[0].dma_addr); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, "DMA error on sg 0: %d\n", err); + err = -ENOMEM; + goto out; + } + + if (!buf->sg[1].data) + goto out; + + buf->sg[1].dma_addr = dma_map_single(dma_device, buf->sg[1].data, + buf->sg[1].size, buf->dma_dir); + err = dma_mapping_error(dma_device, buf->sg[1].dma_addr); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, "DMA error on sg 1: %d\n", err); + dma_unmap_single(dma_device, buf->sg[0].dma_addr, + buf->sg[0].size, buf->dma_dir); + err = -ENOMEM; + } + +out: + return err; +} + +static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct device *dma_device; + + dma_device = &conn->fdev->mdev->pdev->dev; + if (buf->sg[1].data) + dma_unmap_single(dma_device, buf->sg[1].dma_addr, + buf->sg[1].size, buf->dma_dir); + + if (likely(buf->sg[0].data)) + dma_unmap_single(dma_device, buf->sg[0].dma_addr, + buf->sg[0].size, buf->dma_dir); +} + +static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_wqe_data_seg *data; + unsigned int ix; + int err = 0; + + err = mlx5_fpga_conn_map_buf(conn, buf); + if (unlikely(err)) + goto out; + + if (unlikely(conn->qp.rq.pc - conn->qp.rq.cc >= conn->qp.rq.size)) { + mlx5_fpga_conn_unmap_buf(conn, buf); + return -EBUSY; + } + + ix = conn->qp.rq.pc & (conn->qp.rq.size - 1); + data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix); + data->byte_count = cpu_to_be32(buf->sg[0].size); + data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key); + data->addr = cpu_to_be64(buf->sg[0].dma_addr); + + conn->qp.rq.pc++; + conn->qp.rq.bufs[ix] = buf; + + /* Make sure that descriptors are written before doorbell record. */ + dma_wmb(); + *conn->qp.wq.rq.db = cpu_to_be32(conn->qp.rq.pc & 0xffff); +out: + return err; +} + +static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe) +{ + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + *conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc); + /* Make sure that doorbell record is visible before ringing */ + wmb(); + mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET, NULL); +} + +static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_data_seg *data; + unsigned int ix, sgi; + int size = 1; + + ix = conn->qp.sq.pc & (conn->qp.sq.size - 1); + + ctrl = mlx5_wq_cyc_get_wqe(&conn->qp.wq.sq, ix); + data = (void *)(ctrl + 1); + + for (sgi = 0; sgi < ARRAY_SIZE(buf->sg); sgi++) { + if (!buf->sg[sgi].data) + break; + data->byte_count = cpu_to_be32(buf->sg[sgi].size); + data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key); + data->addr = cpu_to_be64(buf->sg[sgi].dma_addr); + data++; + size++; + } + + ctrl->imm = 0; + ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) | + MLX5_OPCODE_SEND); + ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.mqp.qpn << 8)); + + conn->qp.sq.pc++; + conn->qp.sq.bufs[ix] = buf; + mlx5_fpga_conn_notify_hw(conn, ctrl); +} + +int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + unsigned long flags; + int err; + + if (!conn->qp.active) + return -ENOTCONN; + + buf->dma_dir = DMA_TO_DEVICE; + err = mlx5_fpga_conn_map_buf(conn, buf); + if (err) + return err; + + spin_lock_irqsave(&conn->qp.sq.lock, flags); + + if (conn->qp.sq.pc - conn->qp.sq.cc >= conn->qp.sq.size) { + list_add_tail(&buf->list, &conn->qp.sq.backlog); + goto out_unlock; + } + + mlx5_fpga_conn_post_send(conn, buf); + +out_unlock: + spin_unlock_irqrestore(&conn->qp.sq.lock, flags); + return err; +} + +static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_dma_buf *buf; + int err; + + buf = kzalloc(sizeof(*buf) + MLX5_FPGA_RECV_SIZE, 0); + if (!buf) + return -ENOMEM; + + buf->sg[0].data = (void *)(buf + 1); + buf->sg[0].size = MLX5_FPGA_RECV_SIZE; + buf->dma_dir = DMA_FROM_DEVICE; + + err = mlx5_fpga_conn_post_recv(conn, buf); + if (err) + kfree(buf); + + return err; +} + +static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + struct mlx5_core_mkey *mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + + kvfree(in); + return err; +} + +static void mlx5_fpga_conn_rq_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe, u8 status) +{ + struct mlx5_fpga_dma_buf *buf; + int ix, err; + + ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.rq.size - 1); + buf = conn->qp.rq.bufs[ix]; + conn->qp.rq.bufs[ix] = NULL; + conn->qp.rq.cc++; + + if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR))) + mlx5_fpga_warn(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + else + mlx5_fpga_dbg(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + + mlx5_fpga_conn_unmap_buf(conn, buf); + + if (unlikely(status || !conn->qp.active)) { + conn->qp.active = false; + kfree(buf); + return; + } + + buf->sg[0].size = be32_to_cpu(cqe->byte_cnt); + mlx5_fpga_dbg(conn->fdev, "Message with %u bytes received successfully\n", + buf->sg[0].size); + conn->recv_cb(conn->cb_arg, buf); + + buf->sg[0].size = MLX5_FPGA_RECV_SIZE; + err = mlx5_fpga_conn_post_recv(conn, buf); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, + "Failed to re-post recv buf: %d\n", err); + kfree(buf); + } +} + +static void mlx5_fpga_conn_sq_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe, u8 status) +{ + struct mlx5_fpga_dma_buf *buf, *nextbuf; + unsigned long flags; + int ix; + + spin_lock_irqsave(&conn->qp.sq.lock, flags); + + ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.sq.size - 1); + buf = conn->qp.sq.bufs[ix]; + conn->qp.sq.bufs[ix] = NULL; + conn->qp.sq.cc++; + + /* Handle backlog still under the spinlock to ensure message post order */ + if (unlikely(!list_empty(&conn->qp.sq.backlog))) { + if (likely(conn->qp.active)) { + nextbuf = list_first_entry(&conn->qp.sq.backlog, + struct mlx5_fpga_dma_buf, list); + list_del(&nextbuf->list); + mlx5_fpga_conn_post_send(conn, nextbuf); + } + } + + spin_unlock_irqrestore(&conn->qp.sq.lock, flags); + + if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR))) + mlx5_fpga_warn(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + else + mlx5_fpga_dbg(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + + mlx5_fpga_conn_unmap_buf(conn, buf); + + if (likely(buf->complete)) + buf->complete(conn, conn->fdev, buf, status); + + if (unlikely(status)) + conn->qp.active = false; +} + +static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe) +{ + u8 opcode, status = 0; + + opcode = cqe->op_own >> 4; + + switch (opcode) { + case MLX5_CQE_REQ_ERR: + status = ((struct mlx5_err_cqe *)cqe)->syndrome; + /* Fall through */ + case MLX5_CQE_REQ: + mlx5_fpga_conn_sq_cqe(conn, cqe, status); + break; + + case MLX5_CQE_RESP_ERR: + status = ((struct mlx5_err_cqe *)cqe)->syndrome; + /* Fall through */ + case MLX5_CQE_RESP_SEND: + mlx5_fpga_conn_rq_cqe(conn, cqe, status); + break; + default: + mlx5_fpga_warn(conn->fdev, "Unexpected cqe opcode %u\n", + opcode); + } +} + +static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn) +{ + mlx5_cq_arm(&conn->cq.mcq, MLX5_CQ_DB_REQ_NOT, + conn->fdev->conn_res.uar->map, conn->cq.wq.cc); +} + +static void mlx5_fpga_conn_cq_event(struct mlx5_core_cq *mcq, + enum mlx5_event event) +{ + struct mlx5_fpga_conn *conn; + + conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq); + mlx5_fpga_warn(conn->fdev, "CQ event %u on CQ #%u\n", event, mcq->cqn); +} + +static void mlx5_fpga_conn_event(struct mlx5_core_qp *mqp, int event) +{ + struct mlx5_fpga_conn *conn; + + conn = container_of(mqp, struct mlx5_fpga_conn, qp.mqp); + mlx5_fpga_warn(conn->fdev, "QP event %u on QP #%u\n", event, mqp->qpn); +} + +static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn, + unsigned int budget) +{ + struct mlx5_cqe64 *cqe; + + while (budget) { + cqe = mlx5_cqwq_get_cqe(&conn->cq.wq); + if (!cqe) + break; + + budget--; + mlx5_cqwq_pop(&conn->cq.wq); + mlx5_fpga_conn_handle_cqe(conn, cqe); + mlx5_cqwq_update_db_record(&conn->cq.wq); + } + if (!budget) { + tasklet_schedule(&conn->cq.tasklet); + return; + } + + mlx5_fpga_dbg(conn->fdev, "Re-arming CQ with cc# %u\n", conn->cq.wq.cc); + /* ensure cq space is freed before enabling more cqes */ + wmb(); + mlx5_fpga_conn_arm_cq(conn); +} + +static void mlx5_fpga_conn_cq_tasklet(unsigned long data) +{ + struct mlx5_fpga_conn *conn = (void *)data; + + if (unlikely(!conn->qp.active)) + return; + mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); +} + +static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq) +{ + struct mlx5_fpga_conn *conn; + + conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq); + if (unlikely(!conn->qp.active)) + return; + mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); +} + +static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0}; + struct mlx5_wq_param wqp; + struct mlx5_cqe64 *cqe; + int inlen, err, eqn; + unsigned int irqn; + void *cqc, *in; + __be64 *pas; + u32 i; + + cq_size = roundup_pow_of_two(cq_size); + MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size)); + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &conn->cq.wq, + &conn->cq.wq_ctrl); + if (err) + return err; + + for (i = 0; i < mlx5_cqwq_get_size(&conn->cq.wq); i++) { + cqe = mlx5_cqwq_get_wqe(&conn->cq.wq, i); + cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; + } + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * conn->cq.wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_cqwq; + } + + err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); + if (err) { + kvfree(in); + goto err_cqwq; + } + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size)); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index); + MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma); + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas); + + err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen); + kvfree(in); + + if (err) + goto err_cqwq; + + conn->cq.mcq.cqe_sz = 64; + conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; + conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; + *conn->cq.mcq.set_ci_db = 0; + *conn->cq.mcq.arm_db = 0; + conn->cq.mcq.vector = 0; + conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; + conn->cq.mcq.event = mlx5_fpga_conn_cq_event; + conn->cq.mcq.irqn = irqn; + conn->cq.mcq.uar = fdev->conn_res.uar; + tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet, + (unsigned long)conn); + + mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn); + + goto out; + +err_cqwq: + mlx5_wq_destroy(&conn->cq.wq_ctrl); +out: + return err; +} + +static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn) +{ + tasklet_disable(&conn->cq.tasklet); + tasklet_kill(&conn->cq.tasklet); + mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq); + mlx5_wq_destroy(&conn->cq.wq_ctrl); +} + +static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + struct mlx5_wq_param wqp; + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + return mlx5_wq_qp_create(mdev, &wqp, qpc, &conn->qp.wq, + &conn->qp.wq_ctrl); +} + +static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn, + unsigned int tx_size, unsigned int rx_size) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {0}; + void *in = NULL, *qpc; + int err, inlen; + + conn->qp.rq.pc = 0; + conn->qp.rq.cc = 0; + conn->qp.rq.size = roundup_pow_of_two(rx_size); + conn->qp.sq.pc = 0; + conn->qp.sq.cc = 0; + conn->qp.sq.size = roundup_pow_of_two(tx_size); + + MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(conn->qp.rq.size)); + MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(conn->qp.sq.size)); + err = mlx5_fpga_conn_create_wq(conn, temp_qpc); + if (err) + goto out; + + conn->qp.rq.bufs = kvcalloc(conn->qp.rq.size, + sizeof(conn->qp.rq.bufs[0]), + GFP_KERNEL); + if (!conn->qp.rq.bufs) { + err = -ENOMEM; + goto err_wq; + } + + conn->qp.sq.bufs = kvcalloc(conn->qp.sq.size, + sizeof(conn->qp.sq.bufs[0]), + GFP_KERNEL); + if (!conn->qp.sq.bufs) { + err = -ENOMEM; + goto err_rq_bufs; + } + + inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * + conn->qp.wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_sq_bufs; + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, uar_page, fdev->conn_res.uar->index); + MLX5_SET(qpc, qpc, log_page_size, + conn->qp.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, fdev->conn_res.pdn); + MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(conn->qp.rq.size)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size)); + MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); + MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma); + if (MLX5_CAP_GEN(mdev, cqe_version) == 1) + MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); + + mlx5_fill_page_frag_array(&conn->qp.wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas)); + + err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen); + if (err) + goto err_sq_bufs; + + conn->qp.mqp.event = mlx5_fpga_conn_event; + mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.mqp.qpn); + + goto out; + +err_sq_bufs: + kvfree(conn->qp.sq.bufs); +err_rq_bufs: + kvfree(conn->qp.rq.bufs); +err_wq: + mlx5_wq_destroy(&conn->qp.wq_ctrl); +out: + kvfree(in); + return err; +} + +static void mlx5_fpga_conn_free_recv_bufs(struct mlx5_fpga_conn *conn) +{ + int ix; + + for (ix = 0; ix < conn->qp.rq.size; ix++) { + if (!conn->qp.rq.bufs[ix]) + continue; + mlx5_fpga_conn_unmap_buf(conn, conn->qp.rq.bufs[ix]); + kfree(conn->qp.rq.bufs[ix]); + conn->qp.rq.bufs[ix] = NULL; + } +} + +static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_dma_buf *buf, *temp; + int ix; + + for (ix = 0; ix < conn->qp.sq.size; ix++) { + buf = conn->qp.sq.bufs[ix]; + if (!buf) + continue; + conn->qp.sq.bufs[ix] = NULL; + mlx5_fpga_conn_unmap_buf(conn, buf); + if (!buf->complete) + continue; + buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR); + } + list_for_each_entry_safe(buf, temp, &conn->qp.sq.backlog, list) { + mlx5_fpga_conn_unmap_buf(conn, buf); + if (!buf->complete) + continue; + buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR); + } +} + +static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn) +{ + mlx5_core_destroy_qp(conn->fdev->mdev, &conn->qp.mqp); + mlx5_fpga_conn_free_recv_bufs(conn); + mlx5_fpga_conn_flush_send_bufs(conn); + kvfree(conn->qp.sq.bufs); + kvfree(conn->qp.rq.bufs); + mlx5_wq_destroy(&conn->qp.wq_ctrl); +} + +static inline int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_core_dev *mdev = conn->fdev->mdev; + + mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.mqp.qpn); + + return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, NULL, + &conn->qp.mqp); +} + +static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc = NULL; + int err; + + mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.mqp.qpn); + + qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL); + if (!qpc) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM); + MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn); + MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); + MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma); + + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc, + &conn->qp.mqp); + if (err) { + mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err); + goto out; + } + +out: + kfree(qpc); + return err; +} + +static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc = NULL; + int err; + + mlx5_fpga_dbg(conn->fdev, "QP RTR\n"); + + qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL); + if (!qpc) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES); + MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg)); + MLX5_SET(qpc, qpc, remote_qpn, conn->fpga_qpn); + MLX5_SET(qpc, qpc, next_rcv_psn, + MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn)); + MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM); + ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), + MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32)); + MLX5_SET(qpc, qpc, primary_address_path.udp_sport, + MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port)); + MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, + conn->qp.sgid_index); + MLX5_SET(qpc, qpc, primary_address_path.hop_limit, 0); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), + MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip), + MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip)); + + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc, + &conn->qp.mqp); + if (err) { + mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err); + goto out; + } + +out: + kfree(qpc); + return err; +} + +static inline int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc = NULL; + u32 opt_mask; + int err; + + mlx5_fpga_dbg(conn->fdev, "QP RTS\n"); + + qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL); + if (!qpc) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpc, qpc, log_ack_req_freq, 8); + MLX5_SET(qpc, qpc, min_rnr_nak, 0x12); + MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x12); /* ~1.07s */ + MLX5_SET(qpc, qpc, next_send_psn, + MLX5_GET(fpga_qpc, conn->fpga_qpc, next_rcv_psn)); + MLX5_SET(qpc, qpc, retry_count, 7); + MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */ + + opt_mask = MLX5_QP_OPTPAR_RNR_TIMEOUT; + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, opt_mask, qpc, + &conn->qp.mqp); + if (err) { + mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err); + goto out; + } + +out: + kfree(qpc); + return err; +} + +static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + int err; + + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_ACTIVE); + err = mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn, + MLX5_FPGA_QPC_STATE, &conn->fpga_qpc); + if (err) { + mlx5_fpga_err(fdev, "Failed to activate FPGA RC QP: %d\n", err); + goto out; + } + + err = mlx5_fpga_conn_reset_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state to reset\n"); + goto err_fpga_qp; + } + + err = mlx5_fpga_conn_init_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to modify QP from RESET to INIT\n"); + goto err_fpga_qp; + } + conn->qp.active = true; + + while (!mlx5_fpga_conn_post_recv_buf(conn)) + ; + + err = mlx5_fpga_conn_rtr_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state from INIT to RTR\n"); + goto err_recv_bufs; + } + + err = mlx5_fpga_conn_rts_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state from RTR to RTS\n"); + goto err_recv_bufs; + } + goto out; + +err_recv_bufs: + mlx5_fpga_conn_free_recv_bufs(conn); +err_fpga_qp: + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT); + if (mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn, + MLX5_FPGA_QPC_STATE, &conn->fpga_qpc)) + mlx5_fpga_err(fdev, "Failed to revert FPGA QP to INIT\n"); +out: + return err; +} + +struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr, + enum mlx5_ifc_fpga_qp_type qp_type) +{ + struct mlx5_fpga_conn *ret, *conn; + u8 *remote_mac, *remote_ip; + int err; + + if (!attr->recv_cb) + return ERR_PTR(-EINVAL); + + conn = kzalloc(sizeof(*conn), GFP_KERNEL); + if (!conn) + return ERR_PTR(-ENOMEM); + + conn->fdev = fdev; + INIT_LIST_HEAD(&conn->qp.sq.backlog); + + spin_lock_init(&conn->qp.sq.lock); + + conn->recv_cb = attr->recv_cb; + conn->cb_arg = attr->cb_arg; + + remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32); + err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac); + if (err) { + mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err); + ret = ERR_PTR(err); + goto err; + } + + /* Build Modified EUI-64 IPv6 address from the MAC address */ + remote_ip = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_ip); + remote_ip[0] = 0xfe; + remote_ip[1] = 0x80; + addrconf_addr_eui48(&remote_ip[8], remote_mac); + + err = mlx5_core_reserved_gid_alloc(fdev->mdev, &conn->qp.sgid_index); + if (err) { + mlx5_fpga_err(fdev, "Failed to allocate SGID: %d\n", err); + ret = ERR_PTR(err); + goto err; + } + + err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, + MLX5_ROCE_VERSION_2, + MLX5_ROCE_L3_TYPE_IPV6, + remote_ip, remote_mac, true, 0, + MLX5_FPGA_PORT_NUM); + if (err) { + mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err); + ret = ERR_PTR(err); + goto err_rsvd_gid; + } + mlx5_fpga_dbg(fdev, "Reserved SGID index %u\n", conn->qp.sgid_index); + + /* Allow for one cqe per rx/tx wqe, plus one cqe for the next wqe, + * created during processing of the cqe + */ + err = mlx5_fpga_conn_create_cq(conn, + (attr->tx_size + attr->rx_size) * 2); + if (err) { + mlx5_fpga_err(fdev, "Failed to create CQ: %d\n", err); + ret = ERR_PTR(err); + goto err_gid; + } + + mlx5_fpga_conn_arm_cq(conn); + + err = mlx5_fpga_conn_create_qp(conn, attr->tx_size, attr->rx_size); + if (err) { + mlx5_fpga_err(fdev, "Failed to create QP: %d\n", err); + ret = ERR_PTR(err); + goto err_cq; + } + + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT); + MLX5_SET(fpga_qpc, conn->fpga_qpc, qp_type, qp_type); + MLX5_SET(fpga_qpc, conn->fpga_qpc, st, MLX5_FPGA_QPC_ST_RC); + MLX5_SET(fpga_qpc, conn->fpga_qpc, ether_type, ETH_P_8021Q); + MLX5_SET(fpga_qpc, conn->fpga_qpc, vid, 0); + MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1); + MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0); + MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY); + MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.mqp.qpn); + MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7); + MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7); + + err = mlx5_fpga_create_qp(fdev->mdev, &conn->fpga_qpc, + &conn->fpga_qpn); + if (err) { + mlx5_fpga_err(fdev, "Failed to create FPGA RC QP: %d\n", err); + ret = ERR_PTR(err); + goto err_qp; + } + + err = mlx5_fpga_conn_connect(conn); + if (err) { + ret = ERR_PTR(err); + goto err_conn; + } + + mlx5_fpga_dbg(fdev, "FPGA QPN is %u\n", conn->fpga_qpn); + ret = conn; + goto out; + +err_conn: + mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn); +err_qp: + mlx5_fpga_conn_destroy_qp(conn); +err_cq: + mlx5_fpga_conn_destroy_cq(conn); +err_gid: + mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL, + NULL, false, 0, MLX5_FPGA_PORT_NUM); +err_rsvd_gid: + mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index); +err: + kfree(conn); +out: + return ret; +} + +void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + int err = 0; + + conn->qp.active = false; + tasklet_disable(&conn->cq.tasklet); + synchronize_irq(conn->cq.mcq.irqn); + + mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn); + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, NULL, + &conn->qp.mqp); + if (err) + mlx5_fpga_warn(fdev, "qp_modify 2ERR failed: %d\n", err); + mlx5_fpga_conn_destroy_qp(conn); + mlx5_fpga_conn_destroy_cq(conn); + + mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0, + NULL, NULL, false, 0, MLX5_FPGA_PORT_NUM); + mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index); + kfree(conn); +} + +int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev) +{ + int err; + + err = mlx5_nic_vport_enable_roce(fdev->mdev); + if (err) { + mlx5_fpga_err(fdev, "Failed to enable RoCE: %d\n", err); + goto out; + } + + fdev->conn_res.uar = mlx5_get_uars_page(fdev->mdev); + if (IS_ERR(fdev->conn_res.uar)) { + err = PTR_ERR(fdev->conn_res.uar); + mlx5_fpga_err(fdev, "get_uars_page failed, %d\n", err); + goto err_roce; + } + mlx5_fpga_dbg(fdev, "Allocated UAR index %u\n", + fdev->conn_res.uar->index); + + err = mlx5_core_alloc_pd(fdev->mdev, &fdev->conn_res.pdn); + if (err) { + mlx5_fpga_err(fdev, "alloc pd failed, %d\n", err); + goto err_uar; + } + mlx5_fpga_dbg(fdev, "Allocated PD %u\n", fdev->conn_res.pdn); + + err = mlx5_fpga_conn_create_mkey(fdev->mdev, fdev->conn_res.pdn, + &fdev->conn_res.mkey); + if (err) { + mlx5_fpga_err(fdev, "create mkey failed, %d\n", err); + goto err_dealloc_pd; + } + mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey.key); + + return 0; + +err_dealloc_pd: + mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn); +err_uar: + mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar); +err_roce: + mlx5_nic_vport_disable_roce(fdev->mdev); +out: + return err; +} + +void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev) +{ + mlx5_core_destroy_mkey(fdev->mdev, &fdev->conn_res.mkey); + mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn); + mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar); + mlx5_nic_vport_disable_roce(fdev->mdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h new file mode 100644 index 000000000..634ae10e2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_FPGA_CONN_H__ +#define __MLX5_FPGA_CONN_H__ + +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> + +#include "fpga/core.h" +#include "fpga/sdk.h" +#include "wq.h" + +struct mlx5_fpga_conn { + struct mlx5_fpga_device *fdev; + + void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf); + void *cb_arg; + + /* FPGA QP */ + u32 fpga_qpc[MLX5_ST_SZ_DW(fpga_qpc)]; + u32 fpga_qpn; + + /* CQ */ + struct { + struct mlx5_cqwq wq; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_cq mcq; + struct tasklet_struct tasklet; + } cq; + + /* QP */ + struct { + bool active; + int sgid_index; + struct mlx5_wq_qp wq; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_qp mqp; + struct { + spinlock_t lock; /* Protects all SQ state */ + unsigned int pc; + unsigned int cc; + unsigned int size; + struct mlx5_fpga_dma_buf **bufs; + struct list_head backlog; + } sq; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + struct mlx5_fpga_dma_buf **bufs; + } rq; + } qp; +}; + +int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev); +void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev); +struct mlx5_fpga_conn * +mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr, + enum mlx5_ifc_fpga_qp_type qp_type); +void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn); +int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf); + +#endif /* __MLX5_FPGA_CONN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c new file mode 100644 index 000000000..310f9e7d8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" +#include "lib/mlx5.h" +#include "fpga/core.h" +#include "fpga/conn.h" + +static const char *const mlx5_fpga_error_strings[] = { + "Null Syndrome", + "Corrupted DDR", + "Flash Timeout", + "Internal Link Error", + "Watchdog HW Failure", + "I2C Failure", + "Image Changed", + "Temperature Critical", +}; + +static const char * const mlx5_fpga_qp_error_strings[] = { + "Null Syndrome", + "Retry Counter Expired", + "RNR Expired", +}; +static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void) +{ + struct mlx5_fpga_device *fdev = NULL; + + fdev = kzalloc(sizeof(*fdev), GFP_KERNEL); + if (!fdev) + return NULL; + + spin_lock_init(&fdev->state_lock); + fdev->state = MLX5_FPGA_STATUS_NONE; + return fdev; +} + +static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image) +{ + switch (image) { + case MLX5_FPGA_IMAGE_USER: + return "user"; + case MLX5_FPGA_IMAGE_FACTORY: + return "factory"; + default: + return "unknown"; + } +} + +static const char *mlx5_fpga_device_name(u32 device) +{ + switch (device) { + case MLX5_FPGA_DEVICE_KU040: + return "ku040"; + case MLX5_FPGA_DEVICE_KU060: + return "ku060"; + case MLX5_FPGA_DEVICE_KU060_2: + return "ku060_2"; + case MLX5_FPGA_DEVICE_UNKNOWN: + default: + return "unknown"; + } +} + +static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev) +{ + struct mlx5_fpga_query query; + int err; + + err = mlx5_fpga_query(fdev->mdev, &query); + if (err) { + mlx5_fpga_err(fdev, "Failed to query status: %d\n", err); + return err; + } + + fdev->last_admin_image = query.admin_image; + fdev->last_oper_image = query.oper_image; + + mlx5_fpga_dbg(fdev, "Status %u; Admin image %u; Oper image %u\n", + query.status, query.admin_image, query.oper_image); + + if (query.status != MLX5_FPGA_STATUS_SUCCESS) { + mlx5_fpga_err(fdev, "%s image failed to load; status %u\n", + mlx5_fpga_image_name(fdev->last_oper_image), + query.status); + return -EIO; + } + + return 0; +} + +static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev) +{ + int err; + struct mlx5_core_dev *mdev = fdev->mdev; + + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); + if (err) { + mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err); + return err; + } + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX); + if (err) { + mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err); + return err; + } + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF); + if (err) { + mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err); + return err; + } + return 0; +} + +int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned int max_num_qps; + unsigned long flags; + u32 fpga_device_id; + int err; + + if (!fdev) + return 0; + + err = mlx5_fpga_device_load_check(fdev); + if (err) + goto out; + + err = mlx5_fpga_caps(fdev->mdev); + if (err) + goto out; + + fpga_device_id = MLX5_CAP_FPGA(fdev->mdev, fpga_device); + mlx5_fpga_info(fdev, "%s:%u; %s image, version %u; SBU %06x:%04x version %d\n", + mlx5_fpga_device_name(fpga_device_id), + fpga_device_id, + mlx5_fpga_image_name(fdev->last_oper_image), + MLX5_CAP_FPGA(fdev->mdev, image_version), + MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id), + MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id), + MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version)); + + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); + if (!max_num_qps) { + mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n"); + err = -ENOTSUPP; + goto out; + } + + err = mlx5_core_reserve_gids(mdev, max_num_qps); + if (err) + goto out; + + err = mlx5_fpga_conn_device_init(fdev); + if (err) + goto err_rsvd_gid; + + if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { + err = mlx5_fpga_device_brb(fdev); + if (err) + goto err_conn_init; + } + + goto out; + +err_conn_init: + mlx5_fpga_conn_device_cleanup(fdev); + +err_rsvd_gid: + mlx5_core_unreserve_gids(mdev, max_num_qps); +out: + spin_lock_irqsave(&fdev->state_lock, flags); + fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS; + spin_unlock_irqrestore(&fdev->state_lock, flags); + return err; +} + +int mlx5_fpga_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = NULL; + + if (!MLX5_CAP_GEN(mdev, fpga)) { + mlx5_core_dbg(mdev, "FPGA capability not present\n"); + return 0; + } + + mlx5_core_dbg(mdev, "Initializing FPGA\n"); + + fdev = mlx5_fpga_device_alloc(); + if (!fdev) + return -ENOMEM; + + fdev->mdev = mdev; + mdev->fpga = fdev; + + return 0; +} + +void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned int max_num_qps; + unsigned long flags; + int err; + + if (!fdev) + return; + + spin_lock_irqsave(&fdev->state_lock, flags); + if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) { + spin_unlock_irqrestore(&fdev->state_lock, flags); + return; + } + fdev->state = MLX5_FPGA_STATUS_NONE; + spin_unlock_irqrestore(&fdev->state_lock, flags); + + if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); + if (err) + mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n", + err); + } + + mlx5_fpga_conn_device_cleanup(fdev); + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); + mlx5_core_unreserve_gids(mdev, max_num_qps); +} + +void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + + mlx5_fpga_device_stop(mdev); + kfree(fdev); + mdev->fpga = NULL; +} + +static const char *mlx5_fpga_syndrome_to_string(u8 syndrome) +{ + if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings)) + return mlx5_fpga_error_strings[syndrome]; + return "Unknown"; +} + +static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome) +{ + if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings)) + return mlx5_fpga_qp_error_strings[syndrome]; + return "Unknown"; +} + +void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + const char *event_name; + bool teardown = false; + unsigned long flags; + u8 syndrome; + + switch (event) { + case MLX5_EVENT_TYPE_FPGA_ERROR: + syndrome = MLX5_GET(fpga_error_event, data, syndrome); + event_name = mlx5_fpga_syndrome_to_string(syndrome); + break; + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome); + event_name = mlx5_fpga_qp_syndrome_to_string(syndrome); + break; + default: + mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n", + event); + return; + } + + spin_lock_irqsave(&fdev->state_lock, flags); + switch (fdev->state) { + case MLX5_FPGA_STATUS_SUCCESS: + mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name); + teardown = true; + break; + default: + mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n", + syndrome, event_name); + } + spin_unlock_irqrestore(&fdev->state_lock, flags); + /* We tear-down the card's interfaces and functionality because + * the FPGA bump-on-the-wire is misbehaving and we lose ability + * to communicate with the network. User may still be able to + * recover by re-programming or debugging the FPGA + */ + if (teardown) + mlx5_trigger_health_work(fdev->mdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h new file mode 100644 index 000000000..3e2355c8d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_FPGA_CORE_H__ +#define __MLX5_FPGA_CORE_H__ + +#ifdef CONFIG_MLX5_FPGA + +#include "fpga/cmd.h" + +/* Represents an Innova device */ +struct mlx5_fpga_device { + struct mlx5_core_dev *mdev; + spinlock_t state_lock; /* Protects state transitions */ + enum mlx5_fpga_status state; + enum mlx5_fpga_image last_admin_image; + enum mlx5_fpga_image last_oper_image; + + /* QP Connection resources */ + struct { + u32 pdn; + struct mlx5_core_mkey mkey; + struct mlx5_uars_page *uar; + } conn_res; + + struct mlx5_fpga_ipsec *ipsec; + struct mlx5_fpga_tls *tls; +}; + +#define mlx5_fpga_dbg(__adev, format, ...) \ + dev_dbg(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) + +#define mlx5_fpga_err(__adev, format, ...) \ + dev_err(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) + +#define mlx5_fpga_warn(__adev, format, ...) \ + dev_warn(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) + +#define mlx5_fpga_warn_ratelimited(__adev, format, ...) \ + dev_warn_ratelimited(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d: " \ + format, __func__, __LINE__, ##__VA_ARGS__) + +#define mlx5_fpga_notice(__adev, format, ...) \ + dev_notice(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__) + +#define mlx5_fpga_info(__adev, format, ...) \ + dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__) + +int mlx5_fpga_init(struct mlx5_core_dev *mdev); +void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev); +int mlx5_fpga_device_start(struct mlx5_core_dev *mdev); +void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev); +void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data); + +#else + +static inline int mlx5_fpga_init(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev) +{ +} + +static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) +{ +} + +static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, + void *data) +{ +} + +#endif + +#endif /* __MLX5_FPGA_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c new file mode 100644 index 000000000..715ccafc9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -0,0 +1,1533 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/rhashtable.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/fs_helpers.h> +#include <linux/mlx5/fs.h> +#include <linux/rbtree.h> + +#include "mlx5_core.h" +#include "fs_cmd.h" +#include "fpga/ipsec.h" +#include "fpga/sdk.h" +#include "fpga/core.h" + +enum mlx5_fpga_ipsec_cmd_status { + MLX5_FPGA_IPSEC_CMD_PENDING, + MLX5_FPGA_IPSEC_CMD_SEND_FAIL, + MLX5_FPGA_IPSEC_CMD_COMPLETE, +}; + +struct mlx5_fpga_ipsec_cmd_context { + struct mlx5_fpga_dma_buf buf; + enum mlx5_fpga_ipsec_cmd_status status; + struct mlx5_ifc_fpga_ipsec_cmd_resp resp; + int status_code; + struct completion complete; + struct mlx5_fpga_device *dev; + struct list_head list; /* Item in pending_cmds */ + u8 command[0]; +}; + +struct mlx5_fpga_esp_xfrm; + +struct mlx5_fpga_ipsec_sa_ctx { + struct rhash_head hash; + struct mlx5_ifc_fpga_ipsec_sa hw_sa; + struct mlx5_core_dev *dev; + struct mlx5_fpga_esp_xfrm *fpga_xfrm; +}; + +struct mlx5_fpga_esp_xfrm { + unsigned int num_rules; + struct mlx5_fpga_ipsec_sa_ctx *sa_ctx; + struct mutex lock; /* xfrm lock */ + struct mlx5_accel_esp_xfrm accel_xfrm; +}; + +struct mlx5_fpga_ipsec_rule { + struct rb_node node; + struct fs_fte *fte; + struct mlx5_fpga_ipsec_sa_ctx *ctx; +}; + +static const struct rhashtable_params rhash_sa = { + /* Keep out "cmd" field from the key as it's + * value is not constant during the lifetime + * of the key object. + */ + .key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) - + FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), + .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) + + FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), + .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +struct mlx5_fpga_ipsec { + struct mlx5_fpga_device *fdev; + struct list_head pending_cmds; + spinlock_t pending_cmds_lock; /* Protects pending_cmds */ + u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)]; + struct mlx5_fpga_conn *conn; + + struct notifier_block fs_notifier_ingress_bypass; + struct notifier_block fs_notifier_egress; + + /* Map hardware SA --> SA context + * (mlx5_fpga_ipsec_sa) (mlx5_fpga_ipsec_sa_ctx) + * We will use this hash to avoid SAs duplication in fpga which + * aren't allowed + */ + struct rhashtable sa_hash; /* hw_sa -> mlx5_fpga_ipsec_sa_ctx */ + struct mutex sa_hash_lock; + + /* Tree holding all rules for this fpga device + * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id) + */ + struct rb_root rules_rb; + struct mutex rules_rb_lock; /* rules lock */ +}; + +static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) +{ + if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) + return false; + + if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) != + MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX) + return false; + + if (MLX5_CAP_FPGA(mdev, sandbox_product_id) != + MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC) + return false; + + return true; +} + +static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *buf, + u8 status) +{ + struct mlx5_fpga_ipsec_cmd_context *context; + + if (status) { + context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context, + buf); + mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n", + status); + context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL; + complete(&context->complete); + } +} + +static inline +int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome) +{ + switch (syndrome) { + case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS: + return 0; + case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE: + return -EEXIST; + case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST: + return -EINVAL; + case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE: + return -EIO; + } + return -EIO; +} + +static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data; + struct mlx5_fpga_ipsec_cmd_context *context; + enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome; + struct mlx5_fpga_device *fdev = cb_arg; + unsigned long flags; + + if (buf->sg[0].size < sizeof(*resp)) { + mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n", + buf->sg[0].size, sizeof(*resp)); + return; + } + + mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n", + ntohl(resp->syndrome)); + + spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); + context = list_first_entry_or_null(&fdev->ipsec->pending_cmds, + struct mlx5_fpga_ipsec_cmd_context, + list); + if (context) + list_del(&context->list); + spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); + + if (!context) { + mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n"); + return; + } + mlx5_fpga_dbg(fdev, "Handling response for %p\n", context); + + syndrome = ntohl(resp->syndrome); + context->status_code = syndrome_to_errno(syndrome); + context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE; + memcpy(&context->resp, resp, sizeof(*resp)); + + if (context->status_code) + mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n", + syndrome); + + complete(&context->complete); +} + +static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev, + const void *cmd, int cmd_size) +{ + struct mlx5_fpga_ipsec_cmd_context *context; + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned long flags; + int res; + + if (!fdev || !fdev->ipsec) + return ERR_PTR(-EOPNOTSUPP); + + if (cmd_size & 3) + return ERR_PTR(-EINVAL); + + context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC); + if (!context) + return ERR_PTR(-ENOMEM); + + context->status = MLX5_FPGA_IPSEC_CMD_PENDING; + context->dev = fdev; + context->buf.complete = mlx5_fpga_ipsec_send_complete; + init_completion(&context->complete); + memcpy(&context->command, cmd, cmd_size); + context->buf.sg[0].size = cmd_size; + context->buf.sg[0].data = &context->command; + + spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); + res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf); + if (!res) + list_add_tail(&context->list, &fdev->ipsec->pending_cmds); + spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); + + if (res) { + mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res); + kfree(context); + return ERR_PTR(res); + } + + /* Context should be freed by the caller after completion. */ + return context; +} + +static int mlx5_fpga_ipsec_cmd_wait(void *ctx) +{ + struct mlx5_fpga_ipsec_cmd_context *context = ctx; + unsigned long timeout = + msecs_to_jiffies(MLX5_FPGA_CMD_TIMEOUT_MSEC); + int res; + + res = wait_for_completion_timeout(&context->complete, timeout); + if (!res) { + mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n"); + return -ETIMEDOUT; + } + + if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE) + res = context->status_code; + else + res = -EIO; + + return res; +} + +static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec) +{ + if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command)) + return true; + return false; +} + +static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev, + struct mlx5_ifc_fpga_ipsec_sa *hw_sa, + int opcode) +{ + struct mlx5_core_dev *dev = fdev->mdev; + struct mlx5_ifc_fpga_ipsec_sa *sa; + struct mlx5_fpga_ipsec_cmd_context *cmd_context; + size_t sa_cmd_size; + int err; + + hw_sa->ipsec_sa_v1.cmd = htonl(opcode); + if (is_v2_sadb_supported(fdev->ipsec)) + sa_cmd_size = sizeof(*hw_sa); + else + sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1); + + cmd_context = (struct mlx5_fpga_ipsec_cmd_context *) + mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size); + if (IS_ERR(cmd_context)) + return PTR_ERR(cmd_context); + + err = mlx5_fpga_ipsec_cmd_wait(cmd_context); + if (err) + goto out; + + sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command; + if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) { + mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n", + ntohl(sa->ipsec_sa_v1.sw_sa_handle), + ntohl(cmd_context->resp.sw_sa_handle)); + err = -EIO; + } + +out: + kfree(cmd_context); + return err; +} + +u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + u32 ret = 0; + + if (mlx5_fpga_is_ipsec_device(mdev)) { + ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE; + ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA; + } else { + return ret; + } + + if (!fdev->ipsec) + return ret; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp)) + ret |= MLX5_ACCEL_IPSEC_CAP_ESP; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6)) + ret |= MLX5_ACCEL_IPSEC_CAP_IPV6; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso)) + ret |= MLX5_ACCEL_IPSEC_CAP_LSO; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer)) + ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) { + ret |= MLX5_ACCEL_IPSEC_CAP_ESN; + ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN; + } + + return ret; +} + +unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + + if (!fdev || !fdev->ipsec) + return 0; + + return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, + number_of_ipsec_counters); +} + +int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, + unsigned int counters_count) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned int i; + __be32 *data; + u32 count; + u64 addr; + int ret; + + if (!fdev || !fdev->ipsec) + return 0; + + addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, + ipsec_counters_addr_low) + + ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, + ipsec_counters_addr_high) << 32); + + count = mlx5_fpga_ipsec_counters_count(mdev); + + data = kzalloc(array3_size(sizeof(*data), count, 2), GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto out; + } + + ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data, + MLX5_FPGA_ACCESS_TYPE_DONTCARE); + if (ret < 0) { + mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n", + ret); + goto out; + } + ret = 0; + + if (count > counters_count) + count = counters_count; + + /* Each counter is low word, then high. But each word is big-endian */ + for (i = 0; i < count; i++) + counters[i] = (u64)ntohl(data[i * 2]) | + ((u64)ntohl(data[i * 2 + 1]) << 32); + +out: + kfree(data); + return ret; +} + +static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags) +{ + struct mlx5_fpga_ipsec_cmd_context *context; + struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0}; + int err; + + cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP); + cmd.flags = htonl(flags); + context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd)); + if (IS_ERR(context)) + return PTR_ERR(context); + + err = mlx5_fpga_ipsec_cmd_wait(context); + if (err) + goto out; + + if ((context->resp.flags & cmd.flags) != cmd.flags) { + mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n", + cmd.flags, + context->resp.flags); + err = -EIO; + } + +out: + kfree(context); + return err; +} + +static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev) +{ + u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev); + u32 flags = 0; + + if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER) + flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER; + + return mlx5_fpga_ipsec_set_caps(mdev, flags); +} + +static void +mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs, + struct mlx5_ifc_fpga_ipsec_sa *hw_sa) +{ + const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm; + + /* key */ + memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key, + aes_gcm->key_len / 8); + /* Duplicate 128 bit key twice according to HW layout */ + if (aes_gcm->key_len == 128) + memcpy(&hw_sa->ipsec_sa_v1.key_enc[16], + aes_gcm->aes_key, aes_gcm->key_len / 8); + + /* salt and seq_iv */ + memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv, + sizeof(aes_gcm->seq_iv)); + memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt, + sizeof(aes_gcm->salt)); + + /* esn */ + if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) { + hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN; + hw_sa->ipsec_sa_v1.flags |= + (xfrm_attrs->flags & + MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ? + MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0; + hw_sa->esn = htonl(xfrm_attrs->esn); + } else { + hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN; + hw_sa->ipsec_sa_v1.flags &= + ~(xfrm_attrs->flags & + MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ? + MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0; + hw_sa->esn = 0; + } + + /* rx handle */ + hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle); + + /* enc mode */ + switch (aes_gcm->key_len) { + case 128: + hw_sa->ipsec_sa_v1.enc_mode = + MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128; + break; + case 256: + hw_sa->ipsec_sa_v1.enc_mode = + MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128; + break; + } + + /* flags */ + hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID | + MLX5_FPGA_IPSEC_SA_SPI_EN | + MLX5_FPGA_IPSEC_SA_IP_ESP; + + if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT) + hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX; + else + hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX; +} + +static void +mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6, + struct mlx5_ifc_fpga_ipsec_sa *hw_sa) +{ + mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa); + + /* IPs */ + memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip)); + memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip)); + + /* SPI */ + hw_sa->ipsec_sa_v1.spi = spi; + + /* flags */ + if (is_ipv6) + hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6; +} + +static bool is_full_mask(const void *p, size_t len) +{ + WARN_ON(len % 4); + + return !memchr_inv(p, 0xff, len); +} + +static bool validate_fpga_full_mask(struct mlx5_core_dev *dev, + const u32 *match_c, + const u32 *match_v) +{ + const void *misc_params_c = MLX5_ADDR_OF(fte_match_param, + match_c, + misc_parameters); + const void *headers_c = MLX5_ADDR_OF(fte_match_param, + match_c, + outer_headers); + const void *headers_v = MLX5_ADDR_OF(fte_match_param, + match_v, + outer_headers); + + if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) { + const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, + headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, + headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout, + ipv4)) || + !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout, + ipv4))) + return false; + } else { + const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, + headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6); + const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, + headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6); + + if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)) || + !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6))) + return false; + } + + if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, + outer_esp_spi), + MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi))) + return false; + + return true; +} + +static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev, + u8 match_criteria_enable, + const u32 *match_c, + const u32 *match_v) +{ + u32 ipsec_dev_caps = mlx5_accel_ipsec_device_caps(dev); + bool ipv6_flow; + + ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v); + + if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) || + mlx5_fs_is_outer_udp_flow(match_c, match_v) || + mlx5_fs_is_outer_tcp_flow(match_c, match_v) || + mlx5_fs_is_vxlan_flow(match_c) || + !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) || + ipv6_flow)) + return false; + + if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE)) + return false; + + if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) && + mlx5_fs_is_outer_ipsec_flow(match_c)) + return false; + + if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) && + ipv6_flow) + return false; + + if (!validate_fpga_full_mask(dev, match_c, match_v)) + return false; + + return true; +} + +static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, + u8 match_criteria_enable, + const u32 *match_c, + const u32 *match_v, + struct mlx5_flow_act *flow_act) +{ + const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) || + MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0); + bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) || + MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0); + int ret; + + ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c, + match_v); + if (!ret) + return ret; + + if (is_dmac || is_smac || + (match_criteria_enable & + ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || + (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || + flow_act->has_flow_tag) + return false; + + return true; +} + +void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *accel_xfrm, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6) +{ + struct mlx5_fpga_ipsec_sa_ctx *sa_ctx; + struct mlx5_fpga_esp_xfrm *fpga_xfrm = + container_of(accel_xfrm, typeof(*fpga_xfrm), + accel_xfrm); + struct mlx5_fpga_device *fdev = mdev->fpga; + struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; + int opcode, err; + void *context; + + /* alloc SA */ + sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL); + if (!sa_ctx) + return ERR_PTR(-ENOMEM); + + sa_ctx->dev = mdev; + + /* build candidate SA */ + mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs, + saddr, daddr, spi, is_ipv6, + &sa_ctx->hw_sa); + + mutex_lock(&fpga_xfrm->lock); + + if (fpga_xfrm->sa_ctx) { /* multiple rules for same accel_xfrm */ + /* all rules must be with same IPs and SPI */ + if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa, + sizeof(sa_ctx->hw_sa))) { + context = ERR_PTR(-EINVAL); + goto exists; + } + + ++fpga_xfrm->num_rules; + context = fpga_xfrm->sa_ctx; + goto exists; + } + + /* This is unbounded fpga_xfrm, try to add to hash */ + mutex_lock(&fipsec->sa_hash_lock); + + err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash, + rhash_sa); + if (err) { + /* Can't bound different accel_xfrm to already existing sa_ctx. + * This is because we can't support multiple ketmats for + * same IPs and SPI + */ + context = ERR_PTR(-EEXIST); + goto unlock_hash; + } + + /* Bound accel_xfrm to sa_ctx */ + opcode = is_v2_sadb_supported(fdev->ipsec) ? + MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 : + MLX5_FPGA_IPSEC_CMD_OP_ADD_SA; + err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode); + sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; + if (err) { + context = ERR_PTR(err); + goto delete_hash; + } + + mutex_unlock(&fipsec->sa_hash_lock); + + ++fpga_xfrm->num_rules; + fpga_xfrm->sa_ctx = sa_ctx; + sa_ctx->fpga_xfrm = fpga_xfrm; + + mutex_unlock(&fpga_xfrm->lock); + + return sa_ctx; + +delete_hash: + WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash, + rhash_sa)); +unlock_hash: + mutex_unlock(&fipsec->sa_hash_lock); + +exists: + mutex_unlock(&fpga_xfrm->lock); + kfree(sa_ctx); + return context; +} + +static void * +mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev, + struct fs_fte *fte, + bool is_egress) +{ + struct mlx5_accel_esp_xfrm *accel_xfrm; + __be32 saddr[4], daddr[4], spi; + struct mlx5_flow_group *fg; + bool is_ipv6 = false; + + fs_get_obj(fg, fte->node.parent); + /* validate */ + if (is_egress && + !mlx5_is_fpga_egress_ipsec_rule(mdev, + fg->mask.match_criteria_enable, + fg->mask.match_criteria, + fte->val, + &fte->action)) + return ERR_PTR(-EINVAL); + else if (!mlx5_is_fpga_ipsec_rule(mdev, + fg->mask.match_criteria_enable, + fg->mask.match_criteria, + fte->val)) + return ERR_PTR(-EINVAL); + + /* get xfrm context */ + accel_xfrm = + (struct mlx5_accel_esp_xfrm *)fte->action.esp_id; + + /* IPs */ + if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria, + fte->val)) { + memcpy(&saddr[3], + MLX5_ADDR_OF(fte_match_set_lyr_2_4, + fte->val, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + sizeof(saddr[3])); + memcpy(&daddr[3], + MLX5_ADDR_OF(fte_match_set_lyr_2_4, + fte->val, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + sizeof(daddr[3])); + } else { + memcpy(saddr, + MLX5_ADDR_OF(fte_match_param, + fte->val, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(saddr)); + memcpy(daddr, + MLX5_ADDR_OF(fte_match_param, + fte->val, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(daddr)); + is_ipv6 = true; + } + + /* SPI */ + spi = MLX5_GET_BE(typeof(spi), + fte_match_param, fte->val, + misc_parameters.outer_esp_spi); + + /* create */ + return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm, + saddr, daddr, + spi, is_ipv6); +} + +static void +mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx) +{ + struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga; + struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; + int opcode = is_v2_sadb_supported(fdev->ipsec) ? + MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 : + MLX5_FPGA_IPSEC_CMD_OP_DEL_SA; + int err; + + err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode); + sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; + if (err) { + WARN_ON(err); + return; + } + + mutex_lock(&fipsec->sa_hash_lock); + WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash, + rhash_sa)); + mutex_unlock(&fipsec->sa_hash_lock); +} + +void mlx5_fpga_ipsec_delete_sa_ctx(void *context) +{ + struct mlx5_fpga_esp_xfrm *fpga_xfrm = + ((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm; + + mutex_lock(&fpga_xfrm->lock); + if (!--fpga_xfrm->num_rules) { + mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx); + kfree(fpga_xfrm->sa_ctx); + fpga_xfrm->sa_ctx = NULL; + } + mutex_unlock(&fpga_xfrm->lock); +} + +static inline struct mlx5_fpga_ipsec_rule * +_rule_search(struct rb_root *root, struct fs_fte *fte) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct mlx5_fpga_ipsec_rule *rule = + container_of(node, struct mlx5_fpga_ipsec_rule, + node); + + if (rule->fte < fte) + node = node->rb_left; + else if (rule->fte > fte) + node = node->rb_right; + else + return rule; + } + return NULL; +} + +static struct mlx5_fpga_ipsec_rule * +rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte) +{ + struct mlx5_fpga_ipsec_rule *rule; + + mutex_lock(&ipsec_dev->rules_rb_lock); + rule = _rule_search(&ipsec_dev->rules_rb, fte); + mutex_unlock(&ipsec_dev->rules_rb_lock); + + return rule; +} + +static inline int _rule_insert(struct rb_root *root, + struct mlx5_fpga_ipsec_rule *rule) +{ + struct rb_node **new = &root->rb_node, *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct mlx5_fpga_ipsec_rule *this = + container_of(*new, struct mlx5_fpga_ipsec_rule, + node); + + parent = *new; + if (rule->fte < this->fte) + new = &((*new)->rb_left); + else if (rule->fte > this->fte) + new = &((*new)->rb_right); + else + return -EEXIST; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&rule->node, parent, new); + rb_insert_color(&rule->node, root); + + return 0; +} + +static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev, + struct mlx5_fpga_ipsec_rule *rule) +{ + int ret; + + mutex_lock(&ipsec_dev->rules_rb_lock); + ret = _rule_insert(&ipsec_dev->rules_rb, rule); + mutex_unlock(&ipsec_dev->rules_rb_lock); + + return ret; +} + +static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev, + struct mlx5_fpga_ipsec_rule *rule) +{ + struct rb_root *root = &ipsec_dev->rules_rb; + + mutex_lock(&ipsec_dev->rules_rb_lock); + rb_erase(&rule->node, root); + mutex_unlock(&ipsec_dev->rules_rb_lock); +} + +static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev, + struct mlx5_fpga_ipsec_rule *rule) +{ + _rule_delete(ipsec_dev, rule); + kfree(rule); +} + +struct mailbox_mod { + uintptr_t saved_esp_id; + u32 saved_action; + u32 saved_outer_esp_spi_value; +}; + +static void restore_spec_mailbox(struct fs_fte *fte, + struct mailbox_mod *mbox_mod) +{ + char *misc_params_v = MLX5_ADDR_OF(fte_match_param, + fte->val, + misc_parameters); + + MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, + mbox_mod->saved_outer_esp_spi_value); + fte->action.action |= mbox_mod->saved_action; + fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id; +} + +static void modify_spec_mailbox(struct mlx5_core_dev *mdev, + struct fs_fte *fte, + struct mailbox_mod *mbox_mod) +{ + char *misc_params_v = MLX5_ADDR_OF(fte_match_param, + fte->val, + misc_parameters); + + mbox_mod->saved_esp_id = fte->action.esp_id; + mbox_mod->saved_action = fte->action.action & + (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT); + mbox_mod->saved_outer_esp_spi_value = + MLX5_GET(fte_match_set_misc, misc_params_v, + outer_esp_spi); + + fte->action.esp_id = 0; + fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT); + if (!MLX5_CAP_FLOWTABLE(mdev, + flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) + MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0); +} + +static enum fs_flow_table_type egress_to_fs_ft(bool egress) +{ + return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX; +} + +static int fpga_ipsec_fs_create_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id, + bool is_egress) +{ + int (*create_flow_group)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, u32 *in, + unsigned int *group_id) = + mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group; + char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in, + match_criteria.misc_parameters); + u32 saved_outer_esp_spi_mask; + u8 match_criteria_enable; + int ret; + + if (MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) + return create_flow_group(dev, ft, in, group_id); + + match_criteria_enable = + MLX5_GET(create_flow_group_in, in, match_criteria_enable); + saved_outer_esp_spi_mask = + MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi); + if (!match_criteria_enable || !saved_outer_esp_spi_mask) + return create_flow_group(dev, ft, in, group_id); + + MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0); + + if (!(*misc_params_c) && + !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1)) + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS); + + ret = create_flow_group(dev, ft, in, group_id); + + MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask); + MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable); + + return ret; +} + +static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + struct fs_fte *fte, + bool is_egress) +{ + int (*create_fte)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + struct fs_fte *fte) = + mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte; + struct mlx5_fpga_device *fdev = dev->fpga; + struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; + struct mlx5_fpga_ipsec_rule *rule; + bool is_esp = fte->action.esp_id; + struct mailbox_mod mbox_mod; + int ret; + + if (!is_esp || + !(fte->action.action & + (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) + return create_fte(dev, ft, fg, fte); + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return -ENOMEM; + + rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress); + if (IS_ERR(rule->ctx)) { + int err = PTR_ERR(rule->ctx); + kfree(rule); + return err; + } + + rule->fte = fte; + WARN_ON(rule_insert(fipsec, rule)); + + modify_spec_mailbox(dev, fte, &mbox_mod); + ret = create_fte(dev, ft, fg, fte); + restore_spec_mailbox(fte, &mbox_mod); + if (ret) { + _rule_delete(fipsec, rule); + mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx); + kfree(rule); + } + + return ret; +} + +static int fpga_ipsec_fs_update_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte, + bool is_egress) +{ + int (*update_fte)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte) = + mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte; + bool is_esp = fte->action.esp_id; + struct mailbox_mod mbox_mod; + int ret; + + if (!is_esp || + !(fte->action.action & + (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) + return update_fte(dev, ft, group_id, modify_mask, fte); + + modify_spec_mailbox(dev, fte, &mbox_mod); + ret = update_fte(dev, ft, group_id, modify_mask, fte); + restore_spec_mailbox(fte, &mbox_mod); + + return ret; +} + +static int fpga_ipsec_fs_delete_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte, + bool is_egress) +{ + int (*delete_fte)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte) = + mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte; + struct mlx5_fpga_device *fdev = dev->fpga; + struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; + struct mlx5_fpga_ipsec_rule *rule; + bool is_esp = fte->action.esp_id; + struct mailbox_mod mbox_mod; + int ret; + + if (!is_esp || + !(fte->action.action & + (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) + return delete_fte(dev, ft, fte); + + rule = rule_search(fipsec, fte); + if (!rule) + return -ENOENT; + + mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx); + rule_delete(fipsec, rule); + + modify_spec_mailbox(dev, fte, &mbox_mod); + ret = delete_fte(dev, ft, fte); + restore_spec_mailbox(fte, &mbox_mod); + + return ret; +} + +static int +mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id) +{ + return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, true); +} + +static int +mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + struct fs_fte *fte) +{ + return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, true); +} + +static int +mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte) +{ + return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte, + true); +} + +static int +mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + return fpga_ipsec_fs_delete_fte(dev, ft, fte, true); +} + +static int +mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id) +{ + return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, false); +} + +static int +mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + struct fs_fte *fte) +{ + return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, false); +} + +static int +mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte) +{ + return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte, + false); +} + +static int +mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + return fpga_ipsec_fs_delete_fte(dev, ft, fte, false); +} + +static struct mlx5_flow_cmds fpga_ipsec_ingress; +static struct mlx5_flow_cmds fpga_ipsec_egress; + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) +{ + switch (type) { + case FS_FT_NIC_RX: + return &fpga_ipsec_ingress; + case FS_FT_NIC_TX: + return &fpga_ipsec_egress; + default: + WARN_ON(true); + return NULL; + } +} + +int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_conn_attr init_attr = {0}; + struct mlx5_fpga_device *fdev = mdev->fpga; + struct mlx5_fpga_conn *conn; + int err; + + if (!mlx5_fpga_is_ipsec_device(mdev)) + return 0; + + fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL); + if (!fdev->ipsec) + return -ENOMEM; + + fdev->ipsec->fdev = fdev; + + err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps), + fdev->ipsec->caps); + if (err) { + mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n", + err); + goto error; + } + + INIT_LIST_HEAD(&fdev->ipsec->pending_cmds); + spin_lock_init(&fdev->ipsec->pending_cmds_lock); + + init_attr.rx_size = SBU_QP_QUEUE_SIZE; + init_attr.tx_size = SBU_QP_QUEUE_SIZE; + init_attr.recv_cb = mlx5_fpga_ipsec_recv; + init_attr.cb_arg = fdev; + conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr); + if (IS_ERR(conn)) { + err = PTR_ERR(conn); + mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n", + err); + goto error; + } + fdev->ipsec->conn = conn; + + err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa); + if (err) + goto err_destroy_conn; + mutex_init(&fdev->ipsec->sa_hash_lock); + + fdev->ipsec->rules_rb = RB_ROOT; + mutex_init(&fdev->ipsec->rules_rb_lock); + + err = mlx5_fpga_ipsec_enable_supported_caps(mdev); + if (err) { + mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n", + err); + goto err_destroy_hash; + } + + return 0; + +err_destroy_hash: + rhashtable_destroy(&fdev->ipsec->sa_hash); + +err_destroy_conn: + mlx5_fpga_sbu_conn_destroy(conn); + +error: + kfree(fdev->ipsec); + fdev->ipsec = NULL; + return err; +} + +static void destroy_rules_rb(struct rb_root *root) +{ + struct mlx5_fpga_ipsec_rule *r, *tmp; + + rbtree_postorder_for_each_entry_safe(r, tmp, root, node) { + rb_erase(&r->node, root); + mlx5_fpga_ipsec_delete_sa_ctx(r->ctx); + kfree(r); + } +} + +void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + + if (!mlx5_fpga_is_ipsec_device(mdev)) + return; + + destroy_rules_rb(&fdev->ipsec->rules_rb); + rhashtable_destroy(&fdev->ipsec->sa_hash); + + mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn); + kfree(fdev->ipsec); + fdev->ipsec = NULL; +} + +void mlx5_fpga_ipsec_build_fs_cmds(void) +{ + /* ingress */ + fpga_ipsec_ingress.create_flow_table = + mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table; + fpga_ipsec_ingress.destroy_flow_table = + mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table; + fpga_ipsec_ingress.modify_flow_table = + mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table; + fpga_ipsec_ingress.create_flow_group = + mlx5_fpga_ipsec_fs_create_flow_group_ingress; + fpga_ipsec_ingress.destroy_flow_group = + mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group; + fpga_ipsec_ingress.create_fte = + mlx5_fpga_ipsec_fs_create_fte_ingress; + fpga_ipsec_ingress.update_fte = + mlx5_fpga_ipsec_fs_update_fte_ingress; + fpga_ipsec_ingress.delete_fte = + mlx5_fpga_ipsec_fs_delete_fte_ingress; + fpga_ipsec_ingress.update_root_ft = + mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft; + + /* egress */ + fpga_ipsec_egress.create_flow_table = + mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table; + fpga_ipsec_egress.destroy_flow_table = + mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table; + fpga_ipsec_egress.modify_flow_table = + mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table; + fpga_ipsec_egress.create_flow_group = + mlx5_fpga_ipsec_fs_create_flow_group_egress; + fpga_ipsec_egress.destroy_flow_group = + mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group; + fpga_ipsec_egress.create_fte = + mlx5_fpga_ipsec_fs_create_fte_egress; + fpga_ipsec_egress.update_fte = + mlx5_fpga_ipsec_fs_update_fte_egress; + fpga_ipsec_egress.delete_fte = + mlx5_fpga_ipsec_fs_delete_fte_egress; + fpga_ipsec_egress.update_root_ft = + mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft; +} + +static int +mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + if (attrs->tfc_pad) { + mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n"); + return -EOPNOTSUPP; + } + + if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) { + mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n"); + return -EOPNOTSUPP; + } + + if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) { + mlx5_core_err(mdev, "Only aes gcm keymat is supported\n"); + return -EOPNOTSUPP; + } + + if (attrs->keymat.aes_gcm.iv_algo != + MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) { + mlx5_core_err(mdev, "Only iv sequence algo is supported\n"); + return -EOPNOTSUPP; + } + + if (attrs->keymat.aes_gcm.icv_len != 128) { + mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n"); + return -EOPNOTSUPP; + } + + if (attrs->keymat.aes_gcm.key_len != 128 && + attrs->keymat.aes_gcm.key_len != 256) { + mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); + return -EOPNOTSUPP; + } + + if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) && + (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps, + v2_command))) { + mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +struct mlx5_accel_esp_xfrm * +mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs, + u32 flags) +{ + struct mlx5_fpga_esp_xfrm *fpga_xfrm; + + if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) { + mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n"); + return ERR_PTR(-EINVAL); + } + + if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { + mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL); + if (!fpga_xfrm) + return ERR_PTR(-ENOMEM); + + mutex_init(&fpga_xfrm->lock); + memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs, + sizeof(fpga_xfrm->accel_xfrm.attrs)); + + return &fpga_xfrm->accel_xfrm; +} + +void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) +{ + struct mlx5_fpga_esp_xfrm *fpga_xfrm = + container_of(xfrm, struct mlx5_fpga_esp_xfrm, + accel_xfrm); + /* assuming no sa_ctx are connected to this xfrm_ctx */ + kfree(fpga_xfrm); +} + +int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct mlx5_core_dev *mdev = xfrm->mdev; + struct mlx5_fpga_device *fdev = mdev->fpga; + struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; + struct mlx5_fpga_esp_xfrm *fpga_xfrm; + struct mlx5_ifc_fpga_ipsec_sa org_hw_sa; + + int err = 0; + + if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs))) + return 0; + + if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { + mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); + return -EOPNOTSUPP; + } + + if (is_v2_sadb_supported(fipsec)) { + mlx5_core_warn(mdev, "Modify esp is not supported\n"); + return -EOPNOTSUPP; + } + + fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm); + + mutex_lock(&fpga_xfrm->lock); + + if (!fpga_xfrm->sa_ctx) + /* Unbounded xfrm, chane only sw attrs */ + goto change_sw_xfrm_attrs; + + /* copy original hw sa */ + memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa)); + mutex_lock(&fipsec->sa_hash_lock); + /* remove original hw sa from hash */ + WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, + &fpga_xfrm->sa_ctx->hash, rhash_sa)); + /* update hw_sa with new xfrm attrs*/ + mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs, + &fpga_xfrm->sa_ctx->hw_sa); + /* try to insert new hw_sa to hash */ + err = rhashtable_insert_fast(&fipsec->sa_hash, + &fpga_xfrm->sa_ctx->hash, rhash_sa); + if (err) + goto rollback_sa; + + /* modify device with new hw_sa */ + err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa, + MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2); + fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; + if (err) + WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, + &fpga_xfrm->sa_ctx->hash, + rhash_sa)); +rollback_sa: + if (err) { + /* return original hw_sa to hash */ + memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa, + sizeof(org_hw_sa)); + WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash, + &fpga_xfrm->sa_ctx->hash, + rhash_sa)); + } + mutex_unlock(&fipsec->sa_hash_lock); + +change_sw_xfrm_attrs: + if (!err) + memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs)); + mutex_unlock(&fpga_xfrm->lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h new file mode 100644 index 000000000..2b5e63b0d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_FPGA_IPSEC_H__ +#define __MLX5_FPGA_IPSEC_H__ + +#include "accel/ipsec.h" +#include "fs_cmd.h" + +#ifdef CONFIG_MLX5_FPGA + +u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev); +unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev); +int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, + unsigned int counters_count); + +void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *accel_xfrm, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6); +void mlx5_fpga_ipsec_delete_sa_ctx(void *context); + +int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev); +void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev); +void mlx5_fpga_ipsec_build_fs_cmds(void); + +struct mlx5_accel_esp_xfrm * +mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs, + u32 flags); +void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm); +int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, + const struct mlx5_accel_esp_xfrm_attrs *attrs); + +const struct mlx5_flow_cmds * +mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type); + +#else + +static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline unsigned int +mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, + u64 *counters) +{ + return 0; +} + +static inline void * +mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *accel_xfrm, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6) +{ + return NULL; +} + +static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context) +{ +} + +static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) +{ +} + +static inline void mlx5_fpga_ipsec_build_fs_cmds(void) +{ +} + +static inline struct mlx5_accel_esp_xfrm * +mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs, + u32 flags) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) +{ +} + +static inline int +mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + return -EOPNOTSUPP; +} + +static inline const struct mlx5_flow_cmds * +mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) +{ + return mlx5_fs_cmd_get_default(type); +} + +#endif /* CONFIG_MLX5_FPGA */ + +#endif /* __MLX5_FPGA_SADB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c new file mode 100644 index 000000000..14962969c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/device.h> + +#include "fpga/core.h" +#include "fpga/conn.h" +#include "fpga/sdk.h" + +struct mlx5_fpga_conn * +mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr) +{ + return mlx5_fpga_conn_create(fdev, attr, MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_create); + +void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn) +{ + mlx5_fpga_conn_destroy(conn); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_destroy); + +int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + return mlx5_fpga_conn_send(conn, buf); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_sendmsg); + +static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size, + u64 addr, u8 *buf) +{ + size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX; + size_t bytes_done = 0; + u8 actual_size; + int err; + + if (!size) + return -EINVAL; + + if (!fdev->mdev) + return -ENOTCONN; + + while (bytes_done < size) { + actual_size = min(max_size, (size - bytes_done)); + + err = mlx5_fpga_access_reg(fdev->mdev, actual_size, + addr + bytes_done, + buf + bytes_done, false); + if (err) { + mlx5_fpga_err(fdev, "Failed to read over I2C: %d\n", + err); + break; + } + + bytes_done += actual_size; + } + + return err; +} + +static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size, + u64 addr, u8 *buf) +{ + size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX; + size_t bytes_done = 0; + u8 actual_size; + int err; + + if (!size) + return -EINVAL; + + if (!fdev->mdev) + return -ENOTCONN; + + while (bytes_done < size) { + actual_size = min(max_size, (size - bytes_done)); + + err = mlx5_fpga_access_reg(fdev->mdev, actual_size, + addr + bytes_done, + buf + bytes_done, true); + if (err) { + mlx5_fpga_err(fdev, "Failed to write FPGA crspace\n"); + break; + } + + bytes_done += actual_size; + } + + return err; +} + +int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type) +{ + int ret; + + switch (access_type) { + case MLX5_FPGA_ACCESS_TYPE_I2C: + ret = mlx5_fpga_mem_read_i2c(fdev, size, addr, buf); + if (ret) + return ret; + break; + default: + mlx5_fpga_warn(fdev, "Unexpected read access_type %u\n", + access_type); + return -EACCES; + } + + return size; +} +EXPORT_SYMBOL(mlx5_fpga_mem_read); + +int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type) +{ + int ret; + + switch (access_type) { + case MLX5_FPGA_ACCESS_TYPE_I2C: + ret = mlx5_fpga_mem_write_i2c(fdev, size, addr, buf); + if (ret) + return ret; + break; + default: + mlx5_fpga_warn(fdev, "Unexpected write access_type %u\n", + access_type); + return -EACCES; + } + + return size; +} +EXPORT_SYMBOL(mlx5_fpga_mem_write); + +int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf) +{ + return mlx5_fpga_sbu_caps(fdev->mdev, buf, size); +} +EXPORT_SYMBOL(mlx5_fpga_get_sbu_caps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h new file mode 100644 index 000000000..656f96be6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef MLX5_FPGA_SDK_H +#define MLX5_FPGA_SDK_H + +#include <linux/types.h> +#include <linux/dma-direction.h> + +/** + * DOC: Innova SDK + * This header defines the in-kernel API for Innova FPGA client drivers. + */ +#define SBU_QP_QUEUE_SIZE 8 +#define MLX5_FPGA_CMD_TIMEOUT_MSEC (60 * 1000) + +/** + * enum mlx5_fpga_access_type - Enumerated the different methods possible for + * accessing the device memory address space + */ +enum mlx5_fpga_access_type { + /** Use the slow CX-FPGA I2C bus */ + MLX5_FPGA_ACCESS_TYPE_I2C = 0x0, + /** Use the fastest available method */ + MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0, +}; + +struct mlx5_fpga_conn; +struct mlx5_fpga_device; + +/** + * struct mlx5_fpga_dma_entry - A scatter-gather DMA entry + */ +struct mlx5_fpga_dma_entry { + /** @data: Virtual address pointer to the data */ + void *data; + /** @size: Size in bytes of the data */ + unsigned int size; + /** @dma_addr: Private member. Physical DMA-mapped address of the data */ + dma_addr_t dma_addr; +}; + +/** + * struct mlx5_fpga_dma_buf - A packet buffer + * May contain up to 2 scatter-gather data entries + */ +struct mlx5_fpga_dma_buf { + /** @dma_dir: DMA direction */ + enum dma_data_direction dma_dir; + /** @sg: Scatter-gather entries pointing to the data in memory */ + struct mlx5_fpga_dma_entry sg[2]; + /** @list: Item in SQ backlog, for TX packets */ + struct list_head list; + /** + * @complete: Completion routine, for TX packets + * @conn: FPGA Connection this packet was sent to + * @fdev: FPGA device this packet was sent to + * @buf: The packet buffer + * @status: 0 if successful, or an error code otherwise + */ + void (*complete)(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *buf, u8 status); +}; + +/** + * struct mlx5_fpga_conn_attr - FPGA connection attributes + * Describes the attributes of a connection + */ +struct mlx5_fpga_conn_attr { + /** @tx_size: Size of connection TX queue, in packets */ + unsigned int tx_size; + /** @rx_size: Size of connection RX queue, in packets */ + unsigned int rx_size; + /** + * @recv_cb: Callback function which is called for received packets + * @cb_arg: The value provided in mlx5_fpga_conn_attr.cb_arg + * @buf: A buffer containing a received packet + * + * buf is guaranteed to only contain a single scatter-gather entry. + * The size of the actual packet received is specified in buf.sg[0].size + * When this callback returns, the packet buffer may be re-used for + * subsequent receives. + */ + void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf); + void *cb_arg; +}; + +/** + * mlx5_fpga_sbu_conn_create() - Initialize a new FPGA SBU connection + * @fdev: The FPGA device + * @attr: Attributes of the new connection + * + * Sets up a new FPGA SBU connection with the specified attributes. + * The receive callback function may be called for incoming messages even + * before this function returns. + * + * The caller must eventually destroy the connection by calling + * mlx5_fpga_sbu_conn_destroy. + * + * Return: A new connection, or ERR_PTR() error value otherwise. + */ +struct mlx5_fpga_conn * +mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr); + +/** + * mlx5_fpga_sbu_conn_destroy() - Destroy an FPGA SBU connection + * @conn: The FPGA SBU connection to destroy + * + * Cleans up an FPGA SBU connection which was previously created with + * mlx5_fpga_sbu_conn_create. + */ +void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn); + +/** + * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet + * @fdev: An FPGA SBU connection + * @buf: The packet buffer + * + * Queues a packet for transmission over an FPGA SBU connection. + * The buffer should not be modified or freed until completion. + * Upon completion, the buf's complete() callback is invoked, indicating the + * success or error status of the transmission. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf); + +/** + * mlx5_fpga_mem_read() - Read from FPGA memory address space + * @fdev: The FPGA device + * @size: Size of chunk to read, in bytes + * @addr: Starting address to read from, in FPGA address space + * @buf: Buffer to read into + * @access_type: Method for reading + * + * Reads from the specified address into the specified buffer. + * The address may point to configuration space or to DDR. + * Large reads may be performed internally as several non-atomic operations. + * This function may sleep, so should not be called from atomic contexts. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type); + +/** + * mlx5_fpga_mem_write() - Write to FPGA memory address space + * @fdev: The FPGA device + * @size: Size of chunk to write, in bytes + * @addr: Starting address to write to, in FPGA address space + * @buf: Buffer which contains data to write + * @access_type: Method for writing + * + * Writes the specified buffer data to FPGA memory at the specified address. + * The address may point to configuration space or to DDR. + * Large writes may be performed internally as several non-atomic operations. + * This function may sleep, so should not be called from atomic contexts. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type); + +/** + * mlx5_fpga_get_sbu_caps() - Read the SBU capabilities + * @fdev: The FPGA device + * @size: Size of the buffer to read into + * @buf: Buffer to read the capabilities into + * + * Reads the FPGA SBU capabilities into the specified buffer. + * The format of the capabilities buffer is SBU-dependent. + * + * Return: 0 if successful + * -EINVAL if the buffer is not large enough to contain SBU caps + * or any other error value otherwise. + */ +int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf); + +#endif /* MLX5_FPGA_SDK_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c new file mode 100644 index 000000000..22a2ef111 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/device.h> +#include "fpga/tls.h" +#include "fpga/cmd.h" +#include "fpga/sdk.h" +#include "fpga/core.h" +#include "accel/tls.h" + +struct mlx5_fpga_tls_command_context; + +typedef void (*mlx5_fpga_tls_command_complete) + (struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev, + struct mlx5_fpga_tls_command_context *ctx, + struct mlx5_fpga_dma_buf *resp); + +struct mlx5_fpga_tls_command_context { + struct list_head list; + /* There is no guarantee on the order between the TX completion + * and the command response. + * The TX completion is going to touch cmd->buf even in + * the case of successful transmission. + * So instead of requiring separate allocations for cmd + * and cmd->buf we've decided to use a reference counter + */ + refcount_t ref; + struct mlx5_fpga_dma_buf buf; + mlx5_fpga_tls_command_complete complete; +}; + +static void +mlx5_fpga_tls_put_command_ctx(struct mlx5_fpga_tls_command_context *ctx) +{ + if (refcount_dec_and_test(&ctx->ref)) + kfree(ctx); +} + +static void mlx5_fpga_tls_cmd_complete(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *resp) +{ + struct mlx5_fpga_conn *conn = fdev->tls->conn; + struct mlx5_fpga_tls_command_context *ctx; + struct mlx5_fpga_tls *tls = fdev->tls; + unsigned long flags; + + spin_lock_irqsave(&tls->pending_cmds_lock, flags); + ctx = list_first_entry(&tls->pending_cmds, + struct mlx5_fpga_tls_command_context, list); + list_del(&ctx->list); + spin_unlock_irqrestore(&tls->pending_cmds_lock, flags); + ctx->complete(conn, fdev, ctx, resp); +} + +static void mlx5_fpga_cmd_send_complete(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *buf, + u8 status) +{ + struct mlx5_fpga_tls_command_context *ctx = + container_of(buf, struct mlx5_fpga_tls_command_context, buf); + + mlx5_fpga_tls_put_command_ctx(ctx); + + if (unlikely(status)) + mlx5_fpga_tls_cmd_complete(fdev, NULL); +} + +static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_tls_command_context *cmd, + mlx5_fpga_tls_command_complete complete) +{ + struct mlx5_fpga_tls *tls = fdev->tls; + unsigned long flags; + int ret; + + refcount_set(&cmd->ref, 2); + cmd->complete = complete; + cmd->buf.complete = mlx5_fpga_cmd_send_complete; + + spin_lock_irqsave(&tls->pending_cmds_lock, flags); + /* mlx5_fpga_sbu_conn_sendmsg is called under pending_cmds_lock + * to make sure commands are inserted to the tls->pending_cmds list + * and the command QP in the same order. + */ + ret = mlx5_fpga_sbu_conn_sendmsg(tls->conn, &cmd->buf); + if (likely(!ret)) + list_add_tail(&cmd->list, &tls->pending_cmds); + else + complete(tls->conn, fdev, cmd, NULL); + spin_unlock_irqrestore(&tls->pending_cmds_lock, flags); +} + +/* Start of context identifiers range (inclusive) */ +#define SWID_START 0 +/* End of context identifiers range (exclusive) */ +#define SWID_END BIT(24) + +static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock, + void *ptr) +{ + unsigned long flags; + int ret; + + /* TLS metadata format is 1 byte for syndrome followed + * by 3 bytes of swid (software ID) + * swid must not exceed 3 bytes. + * See tls_rxtx.c:insert_pet() for details + */ + BUILD_BUG_ON((SWID_END - 1) & 0xFF000000); + + idr_preload(GFP_KERNEL); + spin_lock_irqsave(idr_spinlock, flags); + ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC); + spin_unlock_irqrestore(idr_spinlock, flags); + idr_preload_end(); + + return ret; +} + +static void *mlx5_fpga_tls_release_swid(struct idr *idr, + spinlock_t *idr_spinlock, u32 swid) +{ + unsigned long flags; + void *ptr; + + spin_lock_irqsave(idr_spinlock, flags); + ptr = idr_remove(idr, swid); + spin_unlock_irqrestore(idr_spinlock, flags); + return ptr; +} + +static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *buf, u8 status) +{ + kfree(buf); +} + +static void +mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_tls_command_context *cmd, + struct mlx5_fpga_dma_buf *resp) +{ + if (resp) { + u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); + + if (syndrome) + mlx5_fpga_err(fdev, + "Teardown stream failed with syndrome = %d", + syndrome); + } + mlx5_fpga_tls_put_command_ctx(cmd); +} + +static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd) +{ + memcpy(MLX5_ADDR_OF(tls_cmd, cmd, src_port), flow, + MLX5_BYTE_OFF(tls_flow, ipv6)); + + MLX5_SET(tls_cmd, cmd, ipv6, MLX5_GET(tls_flow, flow, ipv6)); + MLX5_SET(tls_cmd, cmd, direction_sx, + MLX5_GET(tls_flow, flow, direction_sx)); +} + +int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, + u64 rcd_sn) +{ + struct mlx5_fpga_dma_buf *buf; + int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE; + void *flow; + void *cmd; + int ret; + + buf = kzalloc(size, GFP_ATOMIC); + if (!buf) + return -ENOMEM; + + cmd = (buf + 1); + + rcu_read_lock(); + flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); + if (unlikely(!flow)) { + rcu_read_unlock(); + WARN_ONCE(1, "Received NULL pointer for handle\n"); + kfree(buf); + return -EINVAL; + } + mlx5_fpga_tls_flow_to_cmd(flow, cmd); + rcu_read_unlock(); + + MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); + MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn)); + MLX5_SET(tls_cmd, cmd, tcp_sn, seq); + MLX5_SET(tls_cmd, cmd, command_type, CMD_RESYNC_RX); + + buf->sg[0].data = cmd; + buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; + buf->complete = mlx_tls_kfree_complete; + + ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf); + if (ret < 0) + kfree(buf); + + return ret; +} + +static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, + void *flow, u32 swid, gfp_t flags) +{ + struct mlx5_fpga_tls_command_context *ctx; + struct mlx5_fpga_dma_buf *buf; + void *cmd; + + ctx = kzalloc(sizeof(*ctx) + MLX5_TLS_COMMAND_SIZE, flags); + if (!ctx) + return; + + buf = &ctx->buf; + cmd = (ctx + 1); + MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM); + MLX5_SET(tls_cmd, cmd, swid, swid); + + mlx5_fpga_tls_flow_to_cmd(flow, cmd); + kfree(flow); + + buf->sg[0].data = cmd; + buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; + + mlx5_fpga_tls_cmd_send(mdev->fpga, ctx, + mlx5_fpga_tls_teardown_completion); +} + +void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, + gfp_t flags, bool direction_sx) +{ + struct mlx5_fpga_tls *tls = mdev->fpga->tls; + void *flow; + + if (direction_sx) + flow = mlx5_fpga_tls_release_swid(&tls->tx_idr, + &tls->tx_idr_spinlock, + swid); + else + flow = mlx5_fpga_tls_release_swid(&tls->rx_idr, + &tls->rx_idr_spinlock, + swid); + + if (!flow) { + mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n", + swid); + return; + } + + synchronize_rcu(); /* before kfree(flow) */ + mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags); +} + +enum mlx5_fpga_setup_stream_status { + MLX5_FPGA_CMD_PENDING, + MLX5_FPGA_CMD_SEND_FAILED, + MLX5_FPGA_CMD_RESPONSE_RECEIVED, + MLX5_FPGA_CMD_ABANDONED, +}; + +struct mlx5_setup_stream_context { + struct mlx5_fpga_tls_command_context cmd; + atomic_t status; + u32 syndrome; + struct completion comp; +}; + +static void +mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_tls_command_context *cmd, + struct mlx5_fpga_dma_buf *resp) +{ + struct mlx5_setup_stream_context *ctx = + container_of(cmd, struct mlx5_setup_stream_context, cmd); + int status = MLX5_FPGA_CMD_SEND_FAILED; + void *tls_cmd = ctx + 1; + + /* If we failed to send to command resp == NULL */ + if (resp) { + ctx->syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); + status = MLX5_FPGA_CMD_RESPONSE_RECEIVED; + } + + status = atomic_xchg_release(&ctx->status, status); + if (likely(status != MLX5_FPGA_CMD_ABANDONED)) { + complete(&ctx->comp); + return; + } + + mlx5_fpga_err(fdev, "Command was abandoned, syndrome = %u\n", + ctx->syndrome); + + if (!ctx->syndrome) { + /* The process was killed while waiting for the context to be + * added, and the add completed successfully. + * We need to destroy the HW context, and we can't can't reuse + * the command context because we might not have received + * the tx completion yet. + */ + mlx5_fpga_tls_del_flow(fdev->mdev, + MLX5_GET(tls_cmd, tls_cmd, swid), + GFP_ATOMIC, + MLX5_GET(tls_cmd, tls_cmd, + direction_sx)); + } + + mlx5_fpga_tls_put_command_ctx(cmd); +} + +static int mlx5_fpga_tls_setup_stream_cmd(struct mlx5_core_dev *mdev, + struct mlx5_setup_stream_context *ctx) +{ + struct mlx5_fpga_dma_buf *buf; + void *cmd = ctx + 1; + int status, ret = 0; + + buf = &ctx->cmd.buf; + buf->sg[0].data = cmd; + buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; + MLX5_SET(tls_cmd, cmd, command_type, CMD_SETUP_STREAM); + + init_completion(&ctx->comp); + atomic_set(&ctx->status, MLX5_FPGA_CMD_PENDING); + ctx->syndrome = -1; + + mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd, + mlx5_fpga_tls_setup_completion); + wait_for_completion_killable(&ctx->comp); + + status = atomic_xchg_acquire(&ctx->status, MLX5_FPGA_CMD_ABANDONED); + if (unlikely(status == MLX5_FPGA_CMD_PENDING)) + /* ctx is going to be released in mlx5_fpga_tls_setup_completion */ + return -EINTR; + + if (unlikely(ctx->syndrome)) + ret = -ENOMEM; + + mlx5_fpga_tls_put_command_ctx(&ctx->cmd); + return ret; +} + +static void mlx5_fpga_tls_hw_qp_recv_cb(void *cb_arg, + struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_fpga_device *fdev = (struct mlx5_fpga_device *)cb_arg; + + mlx5_fpga_tls_cmd_complete(fdev, buf); +} + +bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev) +{ + if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) + return false; + + if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) != + MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX) + return false; + + if (MLX5_CAP_FPGA(mdev, sandbox_product_id) != + MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS) + return false; + + if (MLX5_CAP_FPGA(mdev, sandbox_product_version) != 0) + return false; + + return true; +} + +static int mlx5_fpga_tls_get_caps(struct mlx5_fpga_device *fdev, + u32 *p_caps) +{ + int err, cap_size = MLX5_ST_SZ_BYTES(tls_extended_cap); + u32 caps = 0; + void *buf; + + buf = kzalloc(cap_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + err = mlx5_fpga_get_sbu_caps(fdev, cap_size, buf); + if (err) + goto out; + + if (MLX5_GET(tls_extended_cap, buf, tx)) + caps |= MLX5_ACCEL_TLS_TX; + if (MLX5_GET(tls_extended_cap, buf, rx)) + caps |= MLX5_ACCEL_TLS_RX; + if (MLX5_GET(tls_extended_cap, buf, tls_v12)) + caps |= MLX5_ACCEL_TLS_V12; + if (MLX5_GET(tls_extended_cap, buf, tls_v13)) + caps |= MLX5_ACCEL_TLS_V13; + if (MLX5_GET(tls_extended_cap, buf, lro)) + caps |= MLX5_ACCEL_TLS_LRO; + if (MLX5_GET(tls_extended_cap, buf, ipv6)) + caps |= MLX5_ACCEL_TLS_IPV6; + + if (MLX5_GET(tls_extended_cap, buf, aes_gcm_128)) + caps |= MLX5_ACCEL_TLS_AES_GCM128; + if (MLX5_GET(tls_extended_cap, buf, aes_gcm_256)) + caps |= MLX5_ACCEL_TLS_AES_GCM256; + + *p_caps = caps; + err = 0; +out: + kfree(buf); + return err; +} + +int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + struct mlx5_fpga_conn_attr init_attr = {0}; + struct mlx5_fpga_conn *conn; + struct mlx5_fpga_tls *tls; + int err = 0; + + if (!mlx5_fpga_is_tls_device(mdev) || !fdev) + return 0; + + tls = kzalloc(sizeof(*tls), GFP_KERNEL); + if (!tls) + return -ENOMEM; + + err = mlx5_fpga_tls_get_caps(fdev, &tls->caps); + if (err) + goto error; + + if (!(tls->caps & (MLX5_ACCEL_TLS_V12 | MLX5_ACCEL_TLS_AES_GCM128))) { + err = -ENOTSUPP; + goto error; + } + + init_attr.rx_size = SBU_QP_QUEUE_SIZE; + init_attr.tx_size = SBU_QP_QUEUE_SIZE; + init_attr.recv_cb = mlx5_fpga_tls_hw_qp_recv_cb; + init_attr.cb_arg = fdev; + conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr); + if (IS_ERR(conn)) { + err = PTR_ERR(conn); + mlx5_fpga_err(fdev, "Error creating TLS command connection %d\n", + err); + goto error; + } + + tls->conn = conn; + spin_lock_init(&tls->pending_cmds_lock); + INIT_LIST_HEAD(&tls->pending_cmds); + + idr_init(&tls->tx_idr); + idr_init(&tls->rx_idr); + spin_lock_init(&tls->tx_idr_spinlock); + spin_lock_init(&tls->rx_idr_spinlock); + fdev->tls = tls; + return 0; + +error: + kfree(tls); + return err; +} + +void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + + if (!fdev || !fdev->tls) + return; + + mlx5_fpga_sbu_conn_destroy(fdev->tls->conn); + kfree(fdev->tls); + fdev->tls = NULL; +} + +static void mlx5_fpga_tls_set_aes_gcm128_ctx(void *cmd, + struct tls_crypto_info *info, + __be64 *rcd_sn) +{ + struct tls12_crypto_info_aes_gcm_128 *crypto_info = + (struct tls12_crypto_info_aes_gcm_128 *)info; + + memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_rcd_sn), crypto_info->rec_seq, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + + memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_implicit_iv), + crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key), + crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); + + /* in AES-GCM 128 we need to write the key twice */ + memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key) + + TLS_CIPHER_AES_GCM_128_KEY_SIZE, + crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); + + MLX5_SET(tls_cmd, cmd, alg, MLX5_TLS_ALG_AES_GCM_128); +} + +static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps, + struct tls_crypto_info *crypto_info) +{ + __be64 rcd_sn; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + if (!(caps & MLX5_ACCEL_TLS_AES_GCM128)) + return -EINVAL; + mlx5_fpga_tls_set_aes_gcm128_ctx(cmd, crypto_info, &rcd_sn); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int _mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, + struct tls_crypto_info *crypto_info, + u32 swid, u32 tcp_sn) +{ + u32 caps = mlx5_fpga_tls_device_caps(mdev); + struct mlx5_setup_stream_context *ctx; + int ret = -ENOMEM; + size_t cmd_size; + void *cmd; + + cmd_size = MLX5_TLS_COMMAND_SIZE + sizeof(*ctx); + ctx = kzalloc(cmd_size, GFP_KERNEL); + if (!ctx) + goto out; + + cmd = ctx + 1; + ret = mlx5_fpga_tls_set_key_material(cmd, caps, crypto_info); + if (ret) + goto free_ctx; + + mlx5_fpga_tls_flow_to_cmd(flow, cmd); + + MLX5_SET(tls_cmd, cmd, swid, swid); + MLX5_SET(tls_cmd, cmd, tcp_sn, tcp_sn); + + return mlx5_fpga_tls_setup_stream_cmd(mdev, ctx); + +free_ctx: + kfree(ctx); +out: + return ret; +} + +int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn, u32 *p_swid, + bool direction_sx) +{ + struct mlx5_fpga_tls *tls = mdev->fpga->tls; + int ret = -ENOMEM; + u32 swid; + + if (direction_sx) + ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr, + &tls->tx_idr_spinlock, flow); + else + ret = mlx5_fpga_tls_alloc_swid(&tls->rx_idr, + &tls->rx_idr_spinlock, flow); + + if (ret < 0) + return ret; + + swid = ret; + MLX5_SET(tls_flow, flow, direction_sx, direction_sx ? 1 : 0); + + ret = _mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid, + start_offload_tcp_sn); + if (ret && ret != -EINTR) + goto free_swid; + + *p_swid = swid; + return 0; +free_swid: + if (direction_sx) + mlx5_fpga_tls_release_swid(&tls->tx_idr, + &tls->tx_idr_spinlock, swid); + else + mlx5_fpga_tls_release_swid(&tls->rx_idr, + &tls->rx_idr_spinlock, swid); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h new file mode 100644 index 000000000..3b2e37bf7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_FPGA_TLS_H__ +#define __MLX5_FPGA_TLS_H__ + +#include <linux/mlx5/driver.h> + +#include <net/tls.h> +#include "fpga/core.h" + +struct mlx5_fpga_tls { + struct list_head pending_cmds; + spinlock_t pending_cmds_lock; /* Protects pending_cmds */ + u32 caps; + struct mlx5_fpga_conn *conn; + + struct idr tx_idr; + struct idr rx_idr; + spinlock_t tx_idr_spinlock; /* protects the IDR */ + spinlock_t rx_idr_spinlock; /* protects the IDR */ +}; + +int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn, u32 *p_swid, + bool direction_sx); + +void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, + gfp_t flags, bool direction_sx); + +bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev); +int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev); +void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev); + +static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev) +{ + return mdev->fpga->tls->caps; +} + +int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, + u64 rcd_sn); + +#endif /* __MLX5_FPGA_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c new file mode 100644 index 000000000..8e01f8180 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -0,0 +1,768 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/mlx5_ifc.h> + +#include "fs_core.h" +#include "fs_cmd.h" +#include "mlx5_core.h" +#include "eswitch.h" + +static int mlx5_cmd_stub_update_root_ft(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect) +{ + return 0; +} + +static int mlx5_cmd_stub_create_flow_table(struct mlx5_core_dev *dev, + u16 vport, + enum fs_flow_table_op_mod op_mod, + enum fs_flow_table_type type, + unsigned int level, + unsigned int log_size, + struct mlx5_flow_table *next_ft, + unsigned int *table_id, u32 flags) +{ + return 0; +} + +static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft) +{ + return 0; +} + +static int mlx5_cmd_stub_modify_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + return 0; +} + +static int mlx5_cmd_stub_create_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id) +{ + return 0; +} + +static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id) +{ + return 0; +} + +static int mlx5_cmd_stub_create_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + return 0; +} + +static int mlx5_cmd_stub_update_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_stub_delete_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + return 0; +} + +static int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, u32 underlay_qpn, + bool disconnect) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; + + if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + underlay_qpn == 0) + return 0; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); + + if (disconnect) { + MLX5_SET(set_flow_table_root_in, in, op_mod, 1); + MLX5_SET(set_flow_table_root_in, in, table_id, 0); + } else { + MLX5_SET(set_flow_table_root_in, in, op_mod, 0); + MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + } + + MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn); + if (ft->vport) { + MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport); + MLX5_SET(set_flow_table_root_in, in, other_vport, 1); + } + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, + u16 vport, + enum fs_flow_table_op_mod op_mod, + enum fs_flow_table_type type, + unsigned int level, + unsigned int log_size, + struct mlx5_flow_table *next_ft, + unsigned int *table_id, u32 flags) +{ + int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN); + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; + int err; + + MLX5_SET(create_flow_table_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_TABLE); + + MLX5_SET(create_flow_table_in, in, table_type, type); + MLX5_SET(create_flow_table_in, in, flow_table_context.level, level); + MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, log_size); + if (vport) { + MLX5_SET(create_flow_table_in, in, vport_number, vport); + MLX5_SET(create_flow_table_in, in, other_vport, 1); + } + + MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en, + en_encap_decap); + MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en, + en_encap_decap); + + switch (op_mod) { + case FS_FT_OP_MOD_NORMAL: + if (next_ft) { + MLX5_SET(create_flow_table_in, in, + flow_table_context.table_miss_action, 1); + MLX5_SET(create_flow_table_in, in, + flow_table_context.table_miss_id, next_ft->id); + } + break; + + case FS_FT_OP_MOD_LAG_DEMUX: + MLX5_SET(create_flow_table_in, in, op_mod, 0x1); + if (next_ft) + MLX5_SET(create_flow_table_in, in, + flow_table_context.lag_master_next_table_id, + next_ft->id); + break; + } + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *table_id = MLX5_GET(create_flow_table_out, out, + table_id); + return err; +} + +static int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0}; + + MLX5_SET(destroy_flow_table_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + MLX5_SET(destroy_flow_table_in, in, table_type, ft->type); + MLX5_SET(destroy_flow_table_in, in, table_id, ft->id); + if (ft->vport) { + MLX5_SET(destroy_flow_table_in, in, vport_number, ft->vport); + MLX5_SET(destroy_flow_table_in, in, other_vport, 1); + } + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0}; + + MLX5_SET(modify_flow_table_in, in, opcode, + MLX5_CMD_OP_MODIFY_FLOW_TABLE); + MLX5_SET(modify_flow_table_in, in, table_type, ft->type); + MLX5_SET(modify_flow_table_in, in, table_id, ft->id); + + if (ft->op_mod == FS_FT_OP_MOD_LAG_DEMUX) { + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, + flow_table_context.lag_master_next_table_id, next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, + flow_table_context.lag_master_next_table_id, 0); + } + } else { + if (ft->vport) { + MLX5_SET(modify_flow_table_in, in, vport_number, + ft->vport); + MLX5_SET(modify_flow_table_in, in, other_vport, 1); + } + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, + flow_table_context.table_miss_action, 1); + MLX5_SET(modify_flow_table_in, in, + flow_table_context.table_miss_id, + next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, + flow_table_context.table_miss_action, 0); + } + } + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0}; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; + + MLX5_SET(create_flow_group_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET(create_flow_group_in, in, table_type, ft->type); + MLX5_SET(create_flow_group_in, in, table_id, ft->id); + if (ft->vport) { + MLX5_SET(create_flow_group_in, in, vport_number, ft->vport); + MLX5_SET(create_flow_group_in, in, other_vport, 1); + } + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *group_id = MLX5_GET(create_flow_group_out, out, + group_id); + return err; +} + +static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id) +{ + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {0}; + + MLX5_SET(destroy_flow_group_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET(destroy_flow_group_in, in, table_type, ft->type); + MLX5_SET(destroy_flow_group_in, in, table_id, ft->id); + MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + if (ft->vport) { + MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport); + MLX5_SET(destroy_flow_group_in, in, other_vport, 1); + } + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte) +{ + unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + + fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct); + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0}; + struct mlx5_flow_rule *dst; + void *in_flow_context, *vlan; + void *in_match_value; + void *in_dests; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, op_mod, opmod); + MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask); + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, fte->index); + if (ft->vport) { + MLX5_SET(set_fte_in, in, vport_number, ft->vport); + MLX5_SET(set_fte_in, in, other_vport, 1); + } + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + + MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); + MLX5_SET(flow_context, in_flow_context, action, fte->action.action); + MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id); + MLX5_SET(flow_context, in_flow_context, modify_header_id, + fte->action.modify_id); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio); + + in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, + match_value); + memcpy(in_match_value, &fte->val, sizeof(fte->val)); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + int list_size = 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + unsigned int id, type = dst->dest_attr.type; + + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + id = dst->dest_attr.ft_num; + type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + id = dst->dest_attr.ft->id; + break; + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + id = dst->dest_attr.vport.num; + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, + dst->dest_attr.vport.vhca_id_valid); + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id, + dst->dest_attr.vport.vhca_id); + break; + default: + id = dst->dest_attr.tir_num; + } + + MLX5_SET(dest_format_struct, in_dests, destination_type, + type); + MLX5_SET(dest_format_struct, in_dests, destination_id, id); + in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + list_size++; + } + + MLX5_SET(flow_context, in_flow_context, destination_list_size, + list_size); + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev, + log_max_flow_counter, + ft->type)); + int list_size = 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + if (dst->dest_attr.type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + MLX5_SET(flow_counter_list, in_dests, flow_counter_id, + dst->dest_attr.counter->id); + in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + list_size++; + } + if (list_size > max_list_size) { + err = -EINVAL; + goto err_out; + } + + MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, + list_size); + } + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +err_out: + kvfree(in); + return err; +} + +static int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + unsigned int group_id = group->id; + + return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte); +} + +static int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte) +{ + int opmod; + int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive. + flow_modify_en); + if (!atomic_mod_cap) + return -EOPNOTSUPP; + opmod = 1; + + return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte); +} + +static int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {0}; + + MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + MLX5_SET(delete_fte_in, in, table_type, ft->type); + MLX5_SET(delete_fte_in, in, table_id, ft->id); + MLX5_SET(delete_fte_in, in, flow_index, fte->index); + if (ft->vport) { + MLX5_SET(delete_fte_in, in, vport_number, ft->vport); + MLX5_SET(delete_fte_in, in, other_vport, 1); + } + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) +{ + u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0}; + int err; + + MLX5_SET(alloc_flow_counter_in, in, opcode, + MLX5_CMD_OP_ALLOC_FLOW_COUNTER); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); + return err; +} + +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0}; + + MLX5_SET(dealloc_flow_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, + u64 *packets, u64 *bytes) +{ + u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0}; + void *stats; + int err = 0; + + MLX5_SET(query_flow_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_COUNTER); + MLX5_SET(query_flow_counter_in, in, op_mod, 0); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, id); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics); + *packets = MLX5_GET64(traffic_counter, stats, packets); + *bytes = MLX5_GET64(traffic_counter, stats, octets); + return 0; +} + +struct mlx5_cmd_fc_bulk { + u32 id; + int num; + int outlen; + u32 out[0]; +}; + +struct mlx5_cmd_fc_bulk * +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num) +{ + struct mlx5_cmd_fc_bulk *b; + int outlen = + MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter) * num; + + b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL); + if (!b) + return NULL; + + b->id = id; + b->num = num; + b->outlen = outlen; + + return b; +} + +void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b) +{ + kfree(b); +} + +int +mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) +{ + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0}; + + MLX5_SET(query_flow_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_COUNTER); + MLX5_SET(query_flow_counter_in, in, op_mod, 0); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id); + MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num); + return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen); +} + +void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, + struct mlx5_cmd_fc_bulk *b, u32 id, + u64 *packets, u64 *bytes) +{ + int index = id - b->id; + void *stats; + + if (index < 0 || index >= b->num) { + mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n", + id, b->id, b->id + b->num - 1); + return; + } + + stats = MLX5_ADDR_OF(query_flow_counter_out, b->out, + flow_statistics[index]); + *packets = MLX5_GET64(traffic_counter, stats, packets); + *bytes = MLX5_GET64(traffic_counter, stats, octets); +} + +int mlx5_encap_alloc(struct mlx5_core_dev *dev, + int header_type, + size_t size, + void *encap_header, + u32 *encap_id) +{ + int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); + u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)]; + void *encap_header_in; + void *header; + int inlen; + int err; + u32 *in; + + if (size > max_encap_size) { + mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n", + size, max_encap_size); + return -EINVAL; + } + + in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size, + GFP_KERNEL); + if (!in) + return -ENOMEM; + + encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header); + header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header); + inlen = header - (void *)in + size; + + memset(in, 0, inlen); + MLX5_SET(alloc_encap_header_in, in, opcode, + MLX5_CMD_OP_ALLOC_ENCAP_HEADER); + MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size); + MLX5_SET(encap_header_in, encap_header_in, header_type, header_type); + memcpy(header, encap_header, size); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + + *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id); + kfree(in); + return err; +} + +void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(dealloc_encap_header_in, in, opcode, + MLX5_CMD_OP_DEALLOC_ENCAP_HEADER); + MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id); + + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 namespace, u8 num_actions, + void *modify_actions, u32 *modify_header_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)]; + int max_actions, actions_size, inlen, err; + void *actions_in; + u8 table_type; + u32 *in; + + switch (namespace) { + case MLX5_FLOW_NAMESPACE_FDB: + max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions); + table_type = FS_FT_FDB; + break; + case MLX5_FLOW_NAMESPACE_KERNEL: + max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions); + table_type = FS_FT_NIC_RX; + break; + default: + return -EOPNOTSUPP; + } + + if (num_actions > max_actions) { + mlx5_core_warn(dev, "too many modify header actions %d, max supported %d\n", + num_actions, max_actions); + return -EOPNOTSUPP; + } + + actions_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) * num_actions; + inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + actions_size; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type); + MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_actions); + + actions_in = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions); + memcpy(actions_in, modify_actions, actions_size); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + + *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); + kfree(in); + return err; +} + +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(dealloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, + modify_header_id); + + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static const struct mlx5_flow_cmds mlx5_flow_cmds = { + .create_flow_table = mlx5_cmd_create_flow_table, + .destroy_flow_table = mlx5_cmd_destroy_flow_table, + .modify_flow_table = mlx5_cmd_modify_flow_table, + .create_flow_group = mlx5_cmd_create_flow_group, + .destroy_flow_group = mlx5_cmd_destroy_flow_group, + .create_fte = mlx5_cmd_create_fte, + .update_fte = mlx5_cmd_update_fte, + .delete_fte = mlx5_cmd_delete_fte, + .update_root_ft = mlx5_cmd_update_root_ft, +}; + +static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { + .create_flow_table = mlx5_cmd_stub_create_flow_table, + .destroy_flow_table = mlx5_cmd_stub_destroy_flow_table, + .modify_flow_table = mlx5_cmd_stub_modify_flow_table, + .create_flow_group = mlx5_cmd_stub_create_flow_group, + .destroy_flow_group = mlx5_cmd_stub_destroy_flow_group, + .create_fte = mlx5_cmd_stub_create_fte, + .update_fte = mlx5_cmd_stub_update_fte, + .delete_fte = mlx5_cmd_stub_delete_fte, + .update_root_ft = mlx5_cmd_stub_update_root_ft, +}; + +static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) +{ + return &mlx5_flow_cmds; +} + +static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_stub_cmds(void) +{ + return &mlx5_flow_cmd_stubs; +} + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type) +{ + switch (type) { + case FS_FT_NIC_RX: + case FS_FT_ESW_EGRESS_ACL: + case FS_FT_ESW_INGRESS_ACL: + case FS_FT_FDB: + case FS_FT_SNIFFER_RX: + case FS_FT_SNIFFER_TX: + return mlx5_fs_cmd_get_fw_cmds(); + case FS_FT_NIC_TX: + default: + return mlx5_fs_cmd_get_stub_cmds(); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h new file mode 100644 index 000000000..6228ba7bf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_CMD_ +#define _MLX5_FS_CMD_ + +#include "fs_core.h" + +struct mlx5_flow_cmds { + int (*create_flow_table)(struct mlx5_core_dev *dev, + u16 vport, + enum fs_flow_table_op_mod op_mod, + enum fs_flow_table_type type, + unsigned int level, unsigned int log_size, + struct mlx5_flow_table *next_ft, + unsigned int *table_id, u32 flags); + int (*destroy_flow_table)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft); + + int (*modify_flow_table)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft); + + int (*create_flow_group)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id); + + int (*destroy_flow_group)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id); + + int (*create_fte)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + struct fs_fte *fte); + + int (*update_fte)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte); + + int (*delete_fte)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte); + + int (*update_root_ft)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect); +}; + +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, + u64 *packets, u64 *bytes); + +struct mlx5_cmd_fc_bulk; + +struct mlx5_cmd_fc_bulk * +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num); +void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b); +int +mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b); +void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, + struct mlx5_cmd_fc_bulk *b, u32 id, + u64 *packets, u64 *bytes); + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c new file mode 100644 index 000000000..f0aa7f0e5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -0,0 +1,2720 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mutex.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/eswitch.h> + +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" +#include "diag/fs_tracepoint.h" +#include "accel/ipsec.h" +#include "fpga/ipsec.h" + +#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ + sizeof(struct init_tree_node)) + +#define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\ + ...) {.type = FS_TYPE_PRIO,\ + .min_ft_level = min_level_val,\ + .num_levels = num_levels_val,\ + .num_leaf_prios = num_prios_val,\ + .caps = caps_val,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\ + ADD_PRIO(num_prios_val, 0, num_levels_val, {},\ + __VA_ARGS__)\ + +#define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\ + sizeof(long)) + +#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap)) + +#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \ + .caps = (long[]) {__VA_ARGS__} } + +#define FS_CHAINING_CAPS FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_receive.modify_root), \ + FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_receive.flow_table_modify)) + +#define LEFTOVERS_NUM_LEVELS 1 +#define LEFTOVERS_NUM_PRIOS 1 + +#define BY_PASS_PRIO_NUM_LEVELS 1 +#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ + LEFTOVERS_NUM_PRIOS) + +#define ETHTOOL_PRIO_NUM_LEVELS 1 +#define ETHTOOL_NUM_PRIOS 11 +#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) +/* Vlan, mac, ttc, inner ttc, aRFS */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 5 +#define KERNEL_NIC_NUM_PRIOS 1 +/* One more level for tc */ +#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) + +#define KERNEL_NIC_TC_NUM_PRIOS 1 +#define KERNEL_NIC_TC_NUM_LEVELS 2 + +#define ANCHOR_NUM_LEVELS 1 +#define ANCHOR_NUM_PRIOS 1 +#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) + +#define OFFLOADS_MAX_FT 1 +#define OFFLOADS_NUM_PRIOS 1 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) + +#define LAG_PRIO_NUM_LEVELS 1 +#define LAG_NUM_PRIOS 1 +#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1) + +struct node_caps { + size_t arr_sz; + long *caps; +}; + +static struct init_tree_node { + enum fs_node_type type; + struct init_tree_node *children; + int ar_size; + struct node_caps caps; + int min_ft_level; + int num_leaf_prios; + int prio; + int num_levels; +} root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 7, + .children = (struct init_tree_node[]) { + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, LAG_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS, + LAG_PRIO_NUM_LEVELS))), + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), + ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS, + ETHTOOL_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS), + ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, + KERNEL_NIC_PRIO_NUM_LEVELS))), + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))), + ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))), + } +}; + +enum fs_i_lock_class { + FS_LOCK_GRANDPARENT, + FS_LOCK_PARENT, + FS_LOCK_CHILD +}; + +static const struct rhashtable_params rhash_fte = { + .key_len = FIELD_SIZEOF(struct fs_fte, val), + .key_offset = offsetof(struct fs_fte, val), + .head_offset = offsetof(struct fs_fte, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +static const struct rhashtable_params rhash_fg = { + .key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask), + .key_offset = offsetof(struct mlx5_flow_group, mask), + .head_offset = offsetof(struct mlx5_flow_group, hash), + .automatic_shrinking = true, + .min_size = 1, + +}; + +static void del_hw_flow_table(struct fs_node *node); +static void del_hw_flow_group(struct fs_node *node); +static void del_hw_fte(struct fs_node *node); +static void del_sw_flow_table(struct fs_node *node); +static void del_sw_flow_group(struct fs_node *node); +static void del_sw_fte(struct fs_node *node); +static void del_sw_prio(struct fs_node *node); +static void del_sw_ns(struct fs_node *node); +/* Delete rule (destination) is special case that + * requires to lock the FTE for all the deletion process. + */ +static void del_sw_hw_rule(struct fs_node *node); +static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, + struct mlx5_flow_destination *d2); +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns); +static struct mlx5_flow_rule * +find_flow_rule(struct fs_fte *fte, + struct mlx5_flow_destination *dest); + +static void tree_init_node(struct fs_node *node, + void (*del_hw_func)(struct fs_node *), + void (*del_sw_func)(struct fs_node *)) +{ + refcount_set(&node->refcount, 1); + INIT_LIST_HEAD(&node->list); + INIT_LIST_HEAD(&node->children); + init_rwsem(&node->lock); + node->del_hw_func = del_hw_func; + node->del_sw_func = del_sw_func; + node->active = false; +} + +static void tree_add_node(struct fs_node *node, struct fs_node *parent) +{ + if (parent) + refcount_inc(&parent->refcount); + node->parent = parent; + + /* Parent is the root */ + if (!parent) + node->root = node; + else + node->root = parent->root; +} + +static int tree_get_node(struct fs_node *node) +{ + return refcount_inc_not_zero(&node->refcount); +} + +static void nested_down_read_ref_node(struct fs_node *node, + enum fs_i_lock_class class) +{ + if (node) { + down_read_nested(&node->lock, class); + refcount_inc(&node->refcount); + } +} + +static void nested_down_write_ref_node(struct fs_node *node, + enum fs_i_lock_class class) +{ + if (node) { + down_write_nested(&node->lock, class); + refcount_inc(&node->refcount); + } +} + +static void down_write_ref_node(struct fs_node *node) +{ + if (node) { + down_write(&node->lock); + refcount_inc(&node->refcount); + } +} + +static void up_read_ref_node(struct fs_node *node) +{ + refcount_dec(&node->refcount); + up_read(&node->lock); +} + +static void up_write_ref_node(struct fs_node *node) +{ + refcount_dec(&node->refcount); + up_write(&node->lock); +} + +static void tree_put_node(struct fs_node *node) +{ + struct fs_node *parent_node = node->parent; + + if (refcount_dec_and_test(&node->refcount)) { + if (node->del_hw_func) + node->del_hw_func(node); + if (parent_node) { + /* Only root namespace doesn't have parent and we just + * need to free its node. + */ + down_write_ref_node(parent_node); + list_del_init(&node->list); + if (node->del_sw_func) + node->del_sw_func(node); + up_write_ref_node(parent_node); + } else { + kfree(node); + } + node = NULL; + } + if (!node && parent_node) + tree_put_node(parent_node); +} + +static int tree_remove_node(struct fs_node *node) +{ + if (refcount_read(&node->refcount) > 1) { + refcount_dec(&node->refcount); + return -EEXIST; + } + tree_put_node(node); + return 0; +} + +static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, + unsigned int prio) +{ + struct fs_prio *iter_prio; + + fs_for_each_prio(iter_prio, ns) { + if (iter_prio->prio == prio) + return iter_prio; + } + + return NULL; +} + +static bool check_valid_spec(const struct mlx5_flow_spec *spec) +{ + int i; + + for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) + if (spec->match_value[i] & ~spec->match_criteria[i]) { + pr_warn("mlx5_core: match_value differs from match_criteria\n"); + return false; + } + + return true; +} + +static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) +{ + struct fs_node *root; + struct mlx5_flow_namespace *ns; + + root = node->root; + + if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) { + pr_warn("mlx5: flow steering node is not in tree or garbaged\n"); + return NULL; + } + + ns = container_of(root, struct mlx5_flow_namespace, node); + return container_of(ns, struct mlx5_flow_root_namespace, ns); +} + +static inline struct mlx5_flow_steering *get_steering(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev->priv.steering; + return NULL; +} + +static inline struct mlx5_core_dev *get_dev(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev; + return NULL; +} + +static void del_sw_ns(struct fs_node *node) +{ + kfree(node); +} + +static void del_sw_prio(struct fs_node *node) +{ + kfree(node); +} + +static void del_hw_flow_table(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_table *ft; + struct mlx5_core_dev *dev; + int err; + + fs_get_obj(ft, node); + dev = get_dev(&ft->node); + root = find_root(&ft->node); + + if (node->active) { + err = root->cmds->destroy_flow_table(dev, ft); + if (err) + mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + } +} + +static void del_sw_flow_table(struct fs_node *node) +{ + struct mlx5_flow_table *ft; + struct fs_prio *prio; + + fs_get_obj(ft, node); + + rhltable_destroy(&ft->fgs_hash); + fs_get_obj(prio, ft->node.parent); + prio->num_ft--; + kfree(ft); +} + +static void del_sw_hw_rule(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_rule *rule; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct fs_fte *fte; + int modify_mask; + struct mlx5_core_dev *dev = get_dev(node); + int err; + bool update_fte = false; + + fs_get_obj(rule, node); + fs_get_obj(fte, rule->node.parent); + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + trace_mlx5_fs_del_rule(rule); + if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + mutex_lock(&rule->dest_attr.ft->lock); + list_del(&rule->next_ft); + mutex_unlock(&rule->dest_attr.ft->lock); + } + + if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER && + --fte->dests_size) { + modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); + fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; + update_fte = true; + goto out; + } + + if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && + --fte->dests_size) { + modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + update_fte = true; + } +out: + root = find_root(&ft->node); + if (update_fte && fte->dests_size) { + err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte); + if (err) + mlx5_core_warn(dev, + "%s can't del rule fg id=%d fte_index=%d\n", + __func__, fg->id, fte->index); + } + kfree(rule); +} + +static void del_hw_fte(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_core_dev *dev; + struct fs_fte *fte; + int err; + + fs_get_obj(fte, node); + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + + trace_mlx5_fs_del_fte(fte); + dev = get_dev(&ft->node); + root = find_root(&ft->node); + if (node->active) { + err = root->cmds->delete_fte(dev, ft, fte); + if (err) + mlx5_core_warn(dev, + "flow steering can't delete fte in index %d of flow group id %d\n", + fte->index, fg->id); + node->active = 0; + } +} + +static void del_sw_fte(struct fs_node *node) +{ + struct mlx5_flow_steering *steering = get_steering(node); + struct mlx5_flow_group *fg; + struct fs_fte *fte; + int err; + + fs_get_obj(fte, node); + fs_get_obj(fg, fte->node.parent); + + err = rhashtable_remove_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + WARN_ON(err); + ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); + kmem_cache_free(steering->ftes_cache, fte); +} + +static void del_hw_flow_group(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_group *fg; + struct mlx5_flow_table *ft; + struct mlx5_core_dev *dev; + + fs_get_obj(fg, node); + fs_get_obj(ft, fg->node.parent); + dev = get_dev(&ft->node); + trace_mlx5_fs_del_fg(fg); + + root = find_root(&ft->node); + if (fg->node.active && root->cmds->destroy_flow_group(dev, ft, fg->id)) + mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", + fg->id, ft->id); +} + +static void del_sw_flow_group(struct fs_node *node) +{ + struct mlx5_flow_steering *steering = get_steering(node); + struct mlx5_flow_group *fg; + struct mlx5_flow_table *ft; + int err; + + fs_get_obj(fg, node); + fs_get_obj(ft, fg->node.parent); + + rhashtable_destroy(&fg->ftes_hash); + ida_destroy(&fg->fte_allocator); + if (ft->autogroup.active && fg->max_ftes == ft->autogroup.group_size) + ft->autogroup.num_groups--; + err = rhltable_remove(&ft->fgs_hash, + &fg->hash, + rhash_fg); + WARN_ON(err); + kmem_cache_free(steering->fgs_cache, fg); +} + +static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) +{ + int index; + int ret; + + index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL); + if (index < 0) + return index; + + fte->index = index + fg->start_index; + ret = rhashtable_insert_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + if (ret) + goto err_ida_remove; + + tree_add_node(&fte->node, &fg->node); + list_add_tail(&fte->node.list, &fg->node.children); + return 0; + +err_ida_remove: + ida_simple_remove(&fg->fte_allocator, index); + return ret; +} + +static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, + u32 *match_value, + struct mlx5_flow_act *flow_act) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct fs_fte *fte; + + fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL); + if (!fte) + return ERR_PTR(-ENOMEM); + + memcpy(fte->val, match_value, sizeof(fte->val)); + fte->node.type = FS_TYPE_FLOW_ENTRY; + fte->action = *flow_act; + + tree_init_node(&fte->node, del_hw_fte, del_sw_fte); + + return fte; +} + +static void dealloc_flow_group(struct mlx5_flow_steering *steering, + struct mlx5_flow_group *fg) +{ + rhashtable_destroy(&fg->ftes_hash); + kmem_cache_free(steering->fgs_cache, fg); +} + +static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering, + u8 match_criteria_enable, + void *match_criteria, + int start_index, + int end_index) +{ + struct mlx5_flow_group *fg; + int ret; + + fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL); + if (!fg) + return ERR_PTR(-ENOMEM); + + ret = rhashtable_init(&fg->ftes_hash, &rhash_fte); + if (ret) { + kmem_cache_free(steering->fgs_cache, fg); + return ERR_PTR(ret); +} + ida_init(&fg->fte_allocator); + fg->mask.match_criteria_enable = match_criteria_enable; + memcpy(&fg->mask.match_criteria, match_criteria, + sizeof(fg->mask.match_criteria)); + fg->node.type = FS_TYPE_FLOW_GROUP; + fg->start_index = start_index; + fg->max_ftes = end_index - start_index + 1; + + return fg; +} + +static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + void *match_criteria, + int start_index, + int end_index, + struct list_head *prev) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_group *fg; + int ret; + + fg = alloc_flow_group(steering, match_criteria_enable, match_criteria, + start_index, end_index); + if (IS_ERR(fg)) + return fg; + + /* initialize refcnt, add to parent list */ + ret = rhltable_insert(&ft->fgs_hash, + &fg->hash, + rhash_fg); + if (ret) { + dealloc_flow_group(steering, fg); + return ERR_PTR(ret); + } + + tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group); + tree_add_node(&fg->node, &ft->node); + /* Add node to group list */ + list_add(&fg->node.list, prev); + atomic_inc(&ft->node.version); + + return fg; +} + +static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte, + enum fs_flow_table_type table_type, + enum fs_flow_table_op_mod op_mod, + u32 flags) +{ + struct mlx5_flow_table *ft; + int ret; + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + if (!ft) + return ERR_PTR(-ENOMEM); + + ret = rhltable_init(&ft->fgs_hash, &rhash_fg); + if (ret) { + kfree(ft); + return ERR_PTR(ret); + } + + ft->level = level; + ft->node.type = FS_TYPE_FLOW_TABLE; + ft->op_mod = op_mod; + ft->type = table_type; + ft->vport = vport; + ft->max_fte = max_fte; + ft->flags = flags; + INIT_LIST_HEAD(&ft->fwd_rules); + mutex_init(&ft->lock); + + return ft; +} + +/* If reverse is false, then we search for the first flow table in the + * root sub-tree from start(closest from right), else we search for the + * last flow table in the root sub-tree till start(closest from left). + */ +static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, + struct list_head *start, + bool reverse) +{ +#define list_advance_entry(pos, reverse) \ + ((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list)) + +#define list_for_each_advance_continue(pos, head, reverse) \ + for (pos = list_advance_entry(pos, reverse); \ + &pos->list != (head); \ + pos = list_advance_entry(pos, reverse)) + + struct fs_node *iter = list_entry(start, struct fs_node, list); + struct mlx5_flow_table *ft = NULL; + + if (!root) + return NULL; + + list_for_each_advance_continue(iter, &root->children, reverse) { + if (iter->type == FS_TYPE_FLOW_TABLE) { + fs_get_obj(ft, iter); + return ft; + } + ft = find_closest_ft_recursive(iter, &iter->children, reverse); + if (ft) + return ft; + } + + return ft; +} + +/* If reverse if false then return the first flow table in next priority of + * prio in the tree, else return the last flow table in the previous priority + * of prio in the tree. + */ +static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse) +{ + struct mlx5_flow_table *ft = NULL; + struct fs_node *curr_node; + struct fs_node *parent; + + parent = prio->node.parent; + curr_node = &prio->node; + while (!ft && parent) { + ft = find_closest_ft_recursive(parent, &curr_node->list, reverse); + curr_node = parent; + parent = curr_node->parent; + } + return ft; +} + +/* Assuming all the tree is locked by mutex chain lock */ +static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio) +{ + return find_closest_ft(prio, false); +} + +/* Assuming all the tree is locked by mutex chain lock */ +static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio) +{ + return find_closest_ft(prio, true); +} + +static int connect_fts_in_prio(struct mlx5_core_dev *dev, + struct fs_prio *prio, + struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&prio->node); + struct mlx5_flow_table *iter; + int i = 0; + int err; + + fs_for_each_ft(iter, prio) { + i++; + err = root->cmds->modify_flow_table(dev, iter, ft); + if (err) { + mlx5_core_warn(dev, "Failed to modify flow table %d\n", + iter->id); + /* The driver is out of sync with the FW */ + if (i > 1) + WARN_ON(true); + return err; + } + } + return 0; +} + +/* Connect flow tables from previous priority of prio to ft */ +static int connect_prev_fts(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct mlx5_flow_table *prev_ft; + + prev_ft = find_prev_chained_ft(prio); + if (prev_ft) { + struct fs_prio *prev_prio; + + fs_get_obj(prev_prio, prev_ft->node.parent); + return connect_fts_in_prio(dev, prev_prio, ft); + } + return 0; +} + +static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio + *prio) +{ + struct mlx5_flow_root_namespace *root = find_root(&prio->node); + struct mlx5_ft_underlay_qp *uqp; + int min_level = INT_MAX; + int err; + u32 qpn; + + if (root->root_ft) + min_level = root->root_ft->level; + + if (ft->level >= min_level) + return 0; + + if (list_empty(&root->underlay_qpns)) { + /* Don't set any QPN (zero) in case QPN list is empty */ + qpn = 0; + err = root->cmds->update_root_ft(root->dev, ft, qpn, false); + } else { + list_for_each_entry(uqp, &root->underlay_qpns, list) { + qpn = uqp->qpn; + err = root->cmds->update_root_ft(root->dev, ft, + qpn, false); + if (err) + break; + } + } + + if (err) + mlx5_core_warn(root->dev, + "Update root flow table of id(%u) qpn(%d) failed\n", + ft->id, qpn); + else + root->root_ft = ft; + + return err; +} + +static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct fs_fte *fte; + int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + int err = 0; + + fs_get_obj(fte, rule->node.parent); + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return -EINVAL; + down_write_ref_node(&fte->node); + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + + memcpy(&rule->dest_attr, dest, sizeof(*dest)); + root = find_root(&ft->node); + err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id, + modify_mask, fte); + up_write_ref_node(&fte->node); + + return err; +} + +int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle, + struct mlx5_flow_destination *new_dest, + struct mlx5_flow_destination *old_dest) +{ + int i; + + if (!old_dest) { + if (handle->num_rules != 1) + return -EINVAL; + return _mlx5_modify_rule_destination(handle->rule[0], + new_dest); + } + + for (i = 0; i < handle->num_rules; i++) { + if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr)) + return _mlx5_modify_rule_destination(handle->rule[i], + new_dest); + } + + return -EINVAL; +} + +/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */ +static int connect_fwd_rules(struct mlx5_core_dev *dev, + struct mlx5_flow_table *new_next_ft, + struct mlx5_flow_table *old_next_ft) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_rule *iter; + int err = 0; + + /* new_next_ft and old_next_ft could be NULL only + * when we create/destroy the anchor flow table. + */ + if (!new_next_ft || !old_next_ft) + return 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = new_next_ft; + + mutex_lock(&old_next_ft->lock); + list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules); + mutex_unlock(&old_next_ft->lock); + list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) { + err = _mlx5_modify_rule_destination(iter, &dest); + if (err) + pr_err("mlx5_core: failed to modify rule to point on flow table %d\n", + new_next_ft->id); + } + return 0; +} + +static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct mlx5_flow_table *next_ft, *first_ft; + int err = 0; + + /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ + + first_ft = list_first_entry_or_null(&prio->node.children, + struct mlx5_flow_table, node.list); + if (!first_ft || first_ft->level > ft->level) { + err = connect_prev_fts(dev, ft, prio); + if (err) + return err; + + next_ft = first_ft ? first_ft : find_next_chained_ft(prio); + err = connect_fwd_rules(dev, ft, next_ft); + if (err) + return err; + } + + if (MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive.modify_root)) + err = update_root_ft_create(ft, prio); + return err; +} + +static void list_add_flow_table(struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct list_head *prev = &prio->node.children; + struct mlx5_flow_table *iter; + + fs_for_each_ft(iter, prio) { + if (iter->level > ft->level) + break; + prev = &iter->node.list; + } + list_add(&ft->node.list, prev); +} + +static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr, + enum fs_flow_table_op_mod op_mod, + u16 vport) +{ + struct mlx5_flow_root_namespace *root = find_root(&ns->node); + struct mlx5_flow_table *next_ft = NULL; + struct fs_prio *fs_prio = NULL; + struct mlx5_flow_table *ft; + int log_table_sz; + int err; + + if (!root) { + pr_err("mlx5: flow steering failed to find root of namespace\n"); + return ERR_PTR(-ENODEV); + } + + mutex_lock(&root->chain_lock); + fs_prio = find_prio(ns, ft_attr->prio); + if (!fs_prio) { + err = -EINVAL; + goto unlock_root; + } + if (ft_attr->level >= fs_prio->num_levels) { + err = -ENOSPC; + goto unlock_root; + } + /* The level is related to the + * priority level range. + */ + ft_attr->level += fs_prio->start_level; + ft = alloc_flow_table(ft_attr->level, + vport, + ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0, + root->table_type, + op_mod, ft_attr->flags); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto unlock_root; + } + + tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); + log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; + next_ft = find_next_chained_ft(fs_prio); + err = root->cmds->create_flow_table(root->dev, ft->vport, ft->op_mod, + ft->type, ft->level, log_table_sz, + next_ft, &ft->id, ft->flags); + if (err) + goto free_ft; + + err = connect_flow_table(root->dev, ft, fs_prio); + if (err) + goto destroy_ft; + ft->node.active = true; + down_write_ref_node(&fs_prio->node); + tree_add_node(&ft->node, &fs_prio->node); + list_add_flow_table(ft, fs_prio); + fs_prio->num_ft++; + up_write_ref_node(&fs_prio->node); + mutex_unlock(&root->chain_lock); + return ft; +destroy_ft: + root->cmds->destroy_flow_table(root->dev, ft); +free_ft: + rhltable_destroy(&ft->fgs_hash); + kfree(ft); +unlock_root: + mutex_unlock(&root->chain_lock); + return ERR_PTR(err); +} + +struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr) +{ + return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0); +} + +struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, + int prio, int max_fte, + u32 level, u16 vport) +{ + struct mlx5_flow_table_attr ft_attr = {}; + + ft_attr.max_fte = max_fte; + ft_attr.level = level; + ft_attr.prio = prio; + + return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport); +} + +struct mlx5_flow_table* +mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns, + int prio, u32 level) +{ + struct mlx5_flow_table_attr ft_attr = {}; + + ft_attr.level = level; + ft_attr.prio = prio; + return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0); +} +EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); + +struct mlx5_flow_table* +mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int num_flow_table_entries, + int max_num_groups, + u32 level, + u32 flags) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *ft; + + if (max_num_groups > num_flow_table_entries) + return ERR_PTR(-EINVAL); + + ft_attr.max_fte = num_flow_table_entries; + ft_attr.prio = prio; + ft_attr.level = level; + ft_attr.flags = flags; + + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) + return ft; + + ft->autogroup.active = true; + ft->autogroup.required_groups = max_num_groups; + /* We save place for flow groups in addition to max types */ + ft->autogroup.group_size = ft->max_fte / (max_num_groups + 1); + + return ft; +} +EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table); + +struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, + u32 *fg_in) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + fg_in, match_criteria); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + fg_in, + match_criteria_enable); + int start_index = MLX5_GET(create_flow_group_in, fg_in, + start_flow_index); + int end_index = MLX5_GET(create_flow_group_in, fg_in, + end_flow_index); + struct mlx5_core_dev *dev = get_dev(&ft->node); + struct mlx5_flow_group *fg; + int err; + + if (ft->autogroup.active) + return ERR_PTR(-EPERM); + + down_write_ref_node(&ft->node); + fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria, + start_index, end_index, + ft->node.children.prev); + up_write_ref_node(&ft->node); + if (IS_ERR(fg)) + return fg; + + err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id); + if (err) { + tree_put_node(&fg->node); + return ERR_PTR(err); + } + trace_mlx5_fs_add_fg(fg); + fg->node.active = true; + + return fg; +} + +static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return NULL; + + INIT_LIST_HEAD(&rule->next_ft); + rule->node.type = FS_TYPE_FLOW_DEST; + if (dest) + memcpy(&rule->dest_attr, dest, sizeof(*dest)); + + return rule; +} + +static struct mlx5_flow_handle *alloc_handle(int num_rules) +{ + struct mlx5_flow_handle *handle; + + handle = kzalloc(struct_size(handle, rule, num_rules), GFP_KERNEL); + if (!handle) + return NULL; + + handle->num_rules = num_rules; + + return handle; +} + +static void destroy_flow_handle(struct fs_fte *fte, + struct mlx5_flow_handle *handle, + struct mlx5_flow_destination *dest, + int i) +{ + for (; --i >= 0;) { + if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) { + fte->dests_size--; + list_del(&handle->rule[i]->node.list); + kfree(handle->rule[i]); + } + } + kfree(handle); +} + +static struct mlx5_flow_handle * +create_flow_handle(struct fs_fte *fte, + struct mlx5_flow_destination *dest, + int dest_num, + int *modify_mask, + bool *new_rule) +{ + struct mlx5_flow_handle *handle; + struct mlx5_flow_rule *rule = NULL; + static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); + static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + int type; + int i = 0; + + handle = alloc_handle((dest_num) ? dest_num : 1); + if (!handle) + return ERR_PTR(-ENOMEM); + + do { + if (dest) { + rule = find_flow_rule(fte, dest + i); + if (rule) { + refcount_inc(&rule->node.refcount); + goto rule_found; + } + } + + *new_rule = true; + rule = alloc_rule(dest + i); + if (!rule) + goto free_rules; + + /* Add dest to dests list- we need flow tables to be in the + * end of the list for forward to next prio rules. + */ + tree_init_node(&rule->node, NULL, del_sw_hw_rule); + if (dest && + dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + list_add(&rule->node.list, &fte->node.children); + else + list_add_tail(&rule->node.list, &fte->node.children); + if (dest) { + fte->dests_size++; + + type = dest[i].type == + MLX5_FLOW_DESTINATION_TYPE_COUNTER; + *modify_mask |= type ? count : dst; + } +rule_found: + handle->rule[i] = rule; + } while (++i < dest_num); + + return handle; + +free_rules: + destroy_flow_handle(fte, handle, dest, i); + return ERR_PTR(-ENOMEM); +} + +/* fte should not be deleted while calling this function */ +static struct mlx5_flow_handle * +add_rule_fte(struct fs_fte *fte, + struct mlx5_flow_group *fg, + struct mlx5_flow_destination *dest, + int dest_num, + bool update_action) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_handle *handle; + struct mlx5_flow_table *ft; + int modify_mask = 0; + int err; + bool new_rule = false; + + handle = create_flow_handle(fte, dest, dest_num, &modify_mask, + &new_rule); + if (IS_ERR(handle) || !new_rule) + goto out; + + if (update_action) + modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); + + fs_get_obj(ft, fg->node.parent); + root = find_root(&fg->node); + if (!(fte->status & FS_FTE_STATUS_EXISTING)) + err = root->cmds->create_fte(get_dev(&ft->node), + ft, fg, fte); + else + err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id, + modify_mask, fte); + if (err) + goto free_handle; + + fte->node.active = true; + fte->status |= FS_FTE_STATUS_EXISTING; + atomic_inc(&fte->node.version); + +out: + return handle; + +free_handle: + destroy_flow_handle(fte, handle, dest, handle->num_rules); + return ERR_PTR(err); +} + +static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec) +{ + struct list_head *prev = &ft->node.children; + struct mlx5_flow_group *fg; + unsigned int candidate_index = 0; + unsigned int group_size = 0; + + if (!ft->autogroup.active) + return ERR_PTR(-ENOENT); + + if (ft->autogroup.num_groups < ft->autogroup.required_groups) + group_size = ft->autogroup.group_size; + + /* ft->max_fte == ft->autogroup.max_types */ + if (group_size == 0) + group_size = 1; + + /* sorted by start_index */ + fs_for_each_fg(fg, ft) { + if (candidate_index + group_size > fg->start_index) + candidate_index = fg->start_index + fg->max_ftes; + else + break; + prev = &fg->node.list; + } + + if (candidate_index + group_size > ft->max_fte) + return ERR_PTR(-ENOSPC); + + fg = alloc_insert_flow_group(ft, + spec->match_criteria_enable, + spec->match_criteria, + candidate_index, + candidate_index + group_size - 1, + prev); + if (IS_ERR(fg)) + goto out; + + if (group_size == ft->autogroup.group_size) + ft->autogroup.num_groups++; + +out: + return fg; +} + +static int create_auto_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_core_dev *dev = get_dev(&ft->node); + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *match_criteria_addr; + u8 src_esw_owner_mask_on; + void *misc; + int err; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + fg->mask.match_criteria_enable); + MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index); + MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index + + fg->max_ftes - 1); + + misc = MLX5_ADDR_OF(fte_match_param, fg->mask.match_criteria, + misc_parameters); + src_esw_owner_mask_on = !!MLX5_GET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, in, + source_eswitch_owner_vhca_id_valid, src_esw_owner_mask_on); + + match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in, + in, match_criteria); + memcpy(match_criteria_addr, fg->mask.match_criteria, + sizeof(fg->mask.match_criteria)); + + err = root->cmds->create_flow_group(dev, ft, in, &fg->id); + if (!err) { + fg->node.active = true; + trace_mlx5_fs_add_fg(fg); + } + + kvfree(in); + return err; +} + +static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, + struct mlx5_flow_destination *d2) +{ + if (d1->type == d2->type) { + if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + d1->vport.num == d2->vport.num) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + d1->ft == d2->ft) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && + d1->tir_num == d2->tir_num) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM && + d1->ft_num == d2->ft_num)) + return true; + } + + return false; +} + +static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + + list_for_each_entry(rule, &fte->node.children, node.list) { + if (mlx5_flow_dests_cmp(&rule->dest_attr, dest)) + return rule; + } + return NULL; +} + +static bool check_conflicting_actions(u32 action1, u32 action2) +{ + u32 xored_actions = action1 ^ action2; + + /* if one rule only wants to count, it's ok */ + if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT || + action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT) + return false; + + if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_ENCAP | + MLX5_FLOW_CONTEXT_ACTION_DECAP | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | + MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 | + MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) + return true; + + return false; +} + +static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act) +{ + if (check_conflicting_actions(flow_act->action, fte->action.action)) { + mlx5_core_warn(get_dev(&fte->node), + "Found two FTEs with conflicting actions\n"); + return -EEXIST; + } + + if (flow_act->has_flow_tag && + fte->action.flow_tag != flow_act->flow_tag) { + mlx5_core_warn(get_dev(&fte->node), + "FTE flow tag %u already exists with different flow tag %u\n", + fte->action.flow_tag, + flow_act->flow_tag); + return -EEXIST; + } + + return 0; +} + +static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, + u32 *match_value, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num, + struct fs_fte *fte) +{ + struct mlx5_flow_handle *handle; + int old_action; + int i; + int ret; + + ret = check_conflicting_ftes(fte, flow_act); + if (ret) + return ERR_PTR(ret); + + old_action = fte->action.action; + fte->action.action |= flow_act->action; + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != flow_act->action); + if (IS_ERR(handle)) { + fte->action.action = old_action; + return handle; + } + trace_mlx5_fs_set_fte(fte, false); + + for (i = 0; i < handle->num_rules; i++) { + if (refcount_read(&handle->rule[i]->node.refcount) == 1) { + tree_add_node(&handle->rule[i]->node, &fte->node); + trace_mlx5_fs_add_rule(handle->rule[i]); + } + } + return handle; +} + +struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle) +{ + struct mlx5_flow_rule *dst; + struct fs_fte *fte; + + fs_get_obj(fte, handle->rule[0]->node.parent); + + fs_for_each_dst(dst, fte) { + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + return dst->dest_attr.counter; + } + + return NULL; +} + +static bool counter_is_valid(struct mlx5_fc *counter, u32 action) +{ + if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) + return !counter; + + if (!counter) + return false; + + return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)); +} + +static bool dest_is_valid(struct mlx5_flow_destination *dest, + u32 action, + struct mlx5_flow_table *ft) +{ + if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)) + return counter_is_valid(dest->counter, action); + + if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return true; + + if (!dest || ((dest->type == + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) && + (dest->ft->level <= ft->level))) + return false; + return true; +} + +struct match_list { + struct list_head list; + struct mlx5_flow_group *g; +}; + +struct match_list_head { + struct list_head list; + struct match_list first; +}; + +static void free_match_list(struct match_list_head *head) +{ + if (!list_empty(&head->list)) { + struct match_list *iter, *match_tmp; + + list_del(&head->first.list); + tree_put_node(&head->first.g->node); + list_for_each_entry_safe(iter, match_tmp, &head->list, + list) { + tree_put_node(&iter->g->node); + list_del(&iter->list); + kfree(iter); + } + } +} + +static int build_match_list(struct match_list_head *match_head, + struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec) +{ + struct rhlist_head *tmp, *list; + struct mlx5_flow_group *g; + int err = 0; + + rcu_read_lock(); + INIT_LIST_HEAD(&match_head->list); + /* Collect all fgs which has a matching match_criteria */ + list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg); + /* RCU is atomic, we can't execute FW commands here */ + rhl_for_each_entry_rcu(g, tmp, list, hash) { + struct match_list *curr_match; + + if (likely(list_empty(&match_head->list))) { + if (!tree_get_node(&g->node)) + continue; + match_head->first.g = g; + list_add_tail(&match_head->first.list, + &match_head->list); + continue; + } + + curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); + if (!curr_match) { + rcu_read_unlock(); + free_match_list(match_head); + return -ENOMEM; + } + if (!tree_get_node(&g->node)) { + kfree(curr_match); + continue; + } + curr_match->g = g; + list_add_tail(&curr_match->list, &match_head->list); + } + rcu_read_unlock(); + return err; +} + +static u64 matched_fgs_get_version(struct list_head *match_head) +{ + struct match_list *iter; + u64 version = 0; + + list_for_each_entry(iter, match_head, list) + version += (u64)atomic_read(&iter->g->node.version); + return version; +} + +static struct fs_fte * +lookup_fte_locked(struct mlx5_flow_group *g, + u32 *match_value, + bool take_write) +{ + struct fs_fte *fte_tmp; + + if (take_write) + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + else + nested_down_read_ref_node(&g->node, FS_LOCK_PARENT); + fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value, + rhash_fte); + if (!fte_tmp || !tree_get_node(&fte_tmp->node)) { + fte_tmp = NULL; + goto out; + } + if (!fte_tmp->node.active) { + tree_put_node(&fte_tmp->node); + fte_tmp = NULL; + goto out; + } + + nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); +out: + if (take_write) + up_write_ref_node(&g->node); + else + up_read_ref_node(&g->node); + return fte_tmp; +} + +static struct mlx5_flow_handle * +try_add_to_existing_fg(struct mlx5_flow_table *ft, + struct list_head *match_head, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num, + int ft_version) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule; + struct match_list *iter; + bool take_write = false; + struct fs_fte *fte; + u64 version; + int err; + + fte = alloc_fte(ft, spec->match_value, flow_act); + if (IS_ERR(fte)) + return ERR_PTR(-ENOMEM); + +search_again_locked: + version = matched_fgs_get_version(match_head); + /* Try to find a fg that already contains a matching fte */ + list_for_each_entry(iter, match_head, list) { + struct fs_fte *fte_tmp; + + g = iter->g; + fte_tmp = lookup_fte_locked(g, spec->match_value, take_write); + if (!fte_tmp) + continue; + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte_tmp); + up_write_ref_node(&fte_tmp->node); + tree_put_node(&fte_tmp->node); + kmem_cache_free(steering->ftes_cache, fte); + return rule; + } + + /* Check the ft version, for case that new flow group + * was added while the fgs weren't locked + */ + if (atomic_read(&ft->node.version) != ft_version) { + rule = ERR_PTR(-EAGAIN); + goto out; + } + + /* Check the fgs version, for case the new FTE with the + * same values was added while the fgs weren't locked + */ + if (version != matched_fgs_get_version(match_head)) { + take_write = true; + goto search_again_locked; + } + + list_for_each_entry(iter, match_head, list) { + g = iter->g; + + if (!g->node.active) + continue; + + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + + err = insert_fte(g, fte); + if (err) { + up_write_ref_node(&g->node); + if (err == -ENOSPC) + continue; + kmem_cache_free(steering->ftes_cache, fte); + return ERR_PTR(err); + } + + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + up_write_ref_node(&g->node); + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte); + up_write_ref_node(&fte->node); + tree_put_node(&fte->node); + return rule; + } + rule = ERR_PTR(-ENOENT); +out: + kmem_cache_free(steering->ftes_cache, fte); + return rule; +} + +static struct mlx5_flow_handle * +_mlx5_add_flow_rules(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num) + +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule; + struct match_list_head match_head; + bool take_write = false; + struct fs_fte *fte; + int version; + int err; + int i; + + if (!check_valid_spec(spec)) + return ERR_PTR(-EINVAL); + + for (i = 0; i < dest_num; i++) { + if (!dest_is_valid(&dest[i], flow_act->action, ft)) + return ERR_PTR(-EINVAL); + } + nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT); +search_again_locked: + version = atomic_read(&ft->node.version); + + /* Collect all fgs which has a matching match_criteria */ + err = build_match_list(&match_head, ft, spec); + if (err) { + if (take_write) + up_write_ref_node(&ft->node); + else + up_read_ref_node(&ft->node); + return ERR_PTR(err); + } + + if (!take_write) + up_read_ref_node(&ft->node); + + rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest, + dest_num, version); + free_match_list(&match_head); + if (!IS_ERR(rule) || + (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { + if (take_write) + up_write_ref_node(&ft->node); + return rule; + } + + if (!take_write) { + nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT); + take_write = true; + } + + if (PTR_ERR(rule) == -EAGAIN || + version != atomic_read(&ft->node.version)) + goto search_again_locked; + + g = alloc_auto_flow_group(ft, spec); + if (IS_ERR(g)) { + rule = ERR_CAST(g); + up_write_ref_node(&ft->node); + return rule; + } + + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + up_write_ref_node(&ft->node); + + err = create_auto_flow_group(ft, g); + if (err) + goto err_release_fg; + + fte = alloc_fte(ft, spec->match_value, flow_act); + if (IS_ERR(fte)) { + err = PTR_ERR(fte); + goto err_release_fg; + } + + err = insert_fte(g, fte); + if (err) { + kmem_cache_free(steering->ftes_cache, fte); + goto err_release_fg; + } + + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + up_write_ref_node(&g->node); + rule = add_rule_fg(g, spec->match_value, flow_act, dest, + dest_num, fte); + up_write_ref_node(&fte->node); + tree_put_node(&fte->node); + tree_put_node(&g->node); + return rule; + +err_release_fg: + up_write_ref_node(&g->node); + tree_put_node(&g->node); + return ERR_PTR(err); +} + +static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) +{ + return ((ft->type == FS_FT_NIC_RX) && + (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs))); +} + +struct mlx5_flow_handle * +mlx5_add_flow_rules(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_flow_destination gen_dest = {}; + struct mlx5_flow_table *next_ft = NULL; + struct mlx5_flow_handle *handle = NULL; + u32 sw_action = flow_act->action; + struct fs_prio *prio; + + fs_get_obj(prio, ft->node.parent); + if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + if (!fwd_next_prio_supported(ft)) + return ERR_PTR(-EOPNOTSUPP); + if (num_dest) + return ERR_PTR(-EINVAL); + mutex_lock(&root->chain_lock); + next_ft = find_next_chained_ft(prio); + if (next_ft) { + gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + gen_dest.ft = next_ft; + dest = &gen_dest; + num_dest = 1; + flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else { + mutex_unlock(&root->chain_lock); + return ERR_PTR(-EOPNOTSUPP); + } + } + + handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest); + + if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + if (!IS_ERR_OR_NULL(handle) && + (list_empty(&handle->rule[0]->next_ft))) { + mutex_lock(&next_ft->lock); + list_add(&handle->rule[0]->next_ft, + &next_ft->fwd_rules); + mutex_unlock(&next_ft->lock); + handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + } + mutex_unlock(&root->chain_lock); + } + return handle; +} +EXPORT_SYMBOL(mlx5_add_flow_rules); + +void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) +{ + int i; + + for (i = handle->num_rules - 1; i >= 0; i--) + tree_remove_node(&handle->rule[i]->node); + kfree(handle); +} +EXPORT_SYMBOL(mlx5_del_flow_rules); + +/* Assuming prio->node.children(flow tables) is sorted by level */ +static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) +{ + struct fs_prio *prio; + + fs_get_obj(prio, ft->node.parent); + + if (!list_is_last(&ft->node.list, &prio->node.children)) + return list_next_entry(ft, node.list); + return find_next_chained_ft(prio); +} + +static int update_root_ft_destroy(struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_ft_underlay_qp *uqp; + struct mlx5_flow_table *new_root_ft = NULL; + int err = 0; + u32 qpn; + + if (root->root_ft != ft) + return 0; + + new_root_ft = find_next_ft(ft); + if (!new_root_ft) { + root->root_ft = NULL; + return 0; + } + + if (list_empty(&root->underlay_qpns)) { + /* Don't set any QPN (zero) in case QPN list is empty */ + qpn = 0; + err = root->cmds->update_root_ft(root->dev, new_root_ft, + qpn, false); + } else { + list_for_each_entry(uqp, &root->underlay_qpns, list) { + qpn = uqp->qpn; + err = root->cmds->update_root_ft(root->dev, + new_root_ft, qpn, + false); + if (err) + break; + } + } + + if (err) + mlx5_core_warn(root->dev, + "Update root flow table of id(%u) qpn(%d) failed\n", + ft->id, qpn); + else + root->root_ft = new_root_ft; + + return 0; +} + +/* Connect flow table from previous priority to + * the next flow table. + */ +static int disconnect_flow_table(struct mlx5_flow_table *ft) +{ + struct mlx5_core_dev *dev = get_dev(&ft->node); + struct mlx5_flow_table *next_ft; + struct fs_prio *prio; + int err = 0; + + err = update_root_ft_destroy(ft); + if (err) + return err; + + fs_get_obj(prio, ft->node.parent); + if (!(list_first_entry(&prio->node.children, + struct mlx5_flow_table, + node.list) == ft)) + return 0; + + next_ft = find_next_ft(ft); + err = connect_fwd_rules(dev, next_ft, ft); + if (err) + return err; + + err = connect_prev_fts(dev, next_ft, prio); + if (err) + mlx5_core_warn(dev, "Failed to disconnect flow table %d\n", + ft->id); + return err; +} + +int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + int err = 0; + + mutex_lock(&root->chain_lock); + err = disconnect_flow_table(ft); + if (err) { + mutex_unlock(&root->chain_lock); + return err; + } + if (tree_remove_node(&ft->node)) + mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n", + ft->id); + mutex_unlock(&root->chain_lock); + + return err; +} +EXPORT_SYMBOL(mlx5_destroy_flow_table); + +void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) +{ + if (tree_remove_node(&fg->node)) + mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n", + fg->id); +} + +struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + struct mlx5_flow_root_namespace *root_ns; + int prio; + struct fs_prio *fs_prio; + struct mlx5_flow_namespace *ns; + + if (!steering) + return NULL; + + switch (type) { + case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_LAG: + case MLX5_FLOW_NAMESPACE_OFFLOADS: + case MLX5_FLOW_NAMESPACE_ETHTOOL: + case MLX5_FLOW_NAMESPACE_KERNEL: + case MLX5_FLOW_NAMESPACE_LEFTOVERS: + case MLX5_FLOW_NAMESPACE_ANCHOR: + prio = type; + break; + case MLX5_FLOW_NAMESPACE_FDB: + if (steering->fdb_root_ns) + return &steering->fdb_root_ns->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_SNIFFER_RX: + if (steering->sniffer_rx_root_ns) + return &steering->sniffer_rx_root_ns->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_SNIFFER_TX: + if (steering->sniffer_tx_root_ns) + return &steering->sniffer_tx_root_ns->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_EGRESS: + if (steering->egress_root_ns) + return &steering->egress_root_ns->ns; + else + return NULL; + default: + return NULL; + } + + root_ns = steering->root_ns; + if (!root_ns) + return NULL; + + fs_prio = find_prio(&root_ns->ns, prio); + if (!fs_prio) + return NULL; + + ns = list_first_entry(&fs_prio->node.children, + typeof(*ns), + node.list); + + return ns; +} +EXPORT_SYMBOL(mlx5_get_flow_namespace); + +struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type, + int vport) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + if (!steering || vport >= MLX5_TOTAL_VPORTS(dev)) + return NULL; + + switch (type) { + case MLX5_FLOW_NAMESPACE_ESW_EGRESS: + if (steering->esw_egress_root_ns && + steering->esw_egress_root_ns[vport]) + return &steering->esw_egress_root_ns[vport]->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + if (steering->esw_ingress_root_ns && + steering->esw_ingress_root_ns[vport]) + return &steering->esw_ingress_root_ns[vport]->ns; + else + return NULL; + default: + return NULL; + } +} + +static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned int prio, int num_levels) +{ + struct fs_prio *fs_prio; + + fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL); + if (!fs_prio) + return ERR_PTR(-ENOMEM); + + fs_prio->node.type = FS_TYPE_PRIO; + tree_init_node(&fs_prio->node, NULL, del_sw_prio); + tree_add_node(&fs_prio->node, &ns->node); + fs_prio->num_levels = num_levels; + fs_prio->prio = prio; + list_add_tail(&fs_prio->node.list, &ns->node.children); + + return fs_prio; +} + +static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace + *ns) +{ + ns->node.type = FS_TYPE_NAMESPACE; + + return ns; +} + +static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) +{ + struct mlx5_flow_namespace *ns; + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) + return ERR_PTR(-ENOMEM); + + fs_init_namespace(ns); + tree_init_node(&ns->node, NULL, del_sw_ns); + tree_add_node(&ns->node, &prio->node); + list_add_tail(&ns->node.list, &prio->node.children); + + return ns; +} + +static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio, + struct init_tree_node *prio_metadata) +{ + struct fs_prio *fs_prio; + int i; + + for (i = 0; i < prio_metadata->num_leaf_prios; i++) { + fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels); + if (IS_ERR(fs_prio)) + return PTR_ERR(fs_prio); + } + return 0; +} + +#define FLOW_TABLE_BIT_SZ 1 +#define GET_FLOW_TABLE_CAP(dev, offset) \ + ((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) + \ + offset / 32)) >> \ + (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ) +static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) +{ + int i; + + for (i = 0; i < caps->arr_sz; i++) { + if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i])) + return false; + } + return true; +} + +static int init_root_tree_recursive(struct mlx5_flow_steering *steering, + struct init_tree_node *init_node, + struct fs_node *fs_parent_node, + struct init_tree_node *init_parent_node, + int prio) +{ + int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev, + flow_table_properties_nic_receive. + max_ft_level); + struct mlx5_flow_namespace *fs_ns; + struct fs_prio *fs_prio; + struct fs_node *base; + int i; + int err; + + if (init_node->type == FS_TYPE_PRIO) { + if ((init_node->min_ft_level > max_ft_level) || + !has_required_caps(steering->dev, &init_node->caps)) + return 0; + + fs_get_obj(fs_ns, fs_parent_node); + if (init_node->num_leaf_prios) + return create_leaf_prios(fs_ns, prio, init_node); + fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels); + if (IS_ERR(fs_prio)) + return PTR_ERR(fs_prio); + base = &fs_prio->node; + } else if (init_node->type == FS_TYPE_NAMESPACE) { + fs_get_obj(fs_prio, fs_parent_node); + fs_ns = fs_create_namespace(fs_prio); + if (IS_ERR(fs_ns)) + return PTR_ERR(fs_ns); + base = &fs_ns->node; + } else { + return -EINVAL; + } + prio = 0; + for (i = 0; i < init_node->ar_size; i++) { + err = init_root_tree_recursive(steering, &init_node->children[i], + base, init_node, prio); + if (err) + return err; + if (init_node->children[i].type == FS_TYPE_PRIO && + init_node->children[i].num_leaf_prios) { + prio += init_node->children[i].num_leaf_prios; + } + } + + return 0; +} + +static int init_root_tree(struct mlx5_flow_steering *steering, + struct init_tree_node *init_node, + struct fs_node *fs_parent_node) +{ + int i; + struct mlx5_flow_namespace *fs_ns; + int err; + + fs_get_obj(fs_ns, fs_parent_node); + for (i = 0; i < init_node->ar_size; i++) { + err = init_root_tree_recursive(steering, &init_node->children[i], + &fs_ns->node, + init_node, i); + if (err) + return err; + } + return 0; +} + +static struct mlx5_flow_root_namespace +*create_root_ns(struct mlx5_flow_steering *steering, + enum fs_flow_table_type table_type) +{ + const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type); + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_namespace *ns; + + if (mlx5_accel_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE && + (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX)) + cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type); + + /* Create the root namespace */ + root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL); + if (!root_ns) + return NULL; + + root_ns->dev = steering->dev; + root_ns->table_type = table_type; + root_ns->cmds = cmds; + + INIT_LIST_HEAD(&root_ns->underlay_qpns); + + ns = &root_ns->ns; + fs_init_namespace(ns); + mutex_init(&root_ns->chain_lock); + tree_init_node(&ns->node, NULL, NULL); + tree_add_node(&ns->node, NULL); + + return root_ns; +} + +static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level); + +static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level) +{ + struct fs_prio *prio; + + fs_for_each_prio(prio, ns) { + /* This updates prio start_level and num_levels */ + set_prio_attrs_in_prio(prio, acc_level); + acc_level += prio->num_levels; + } + return acc_level; +} + +static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level) +{ + struct mlx5_flow_namespace *ns; + int acc_level_ns = acc_level; + + prio->start_level = acc_level; + fs_for_each_ns(ns, prio) + /* This updates start_level and num_levels of ns's priority descendants */ + acc_level_ns = set_prio_attrs_in_ns(ns, acc_level); + if (!prio->num_levels) + prio->num_levels = acc_level_ns - prio->start_level; + WARN_ON(prio->num_levels < acc_level_ns - prio->start_level); +} + +static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) +{ + struct mlx5_flow_namespace *ns = &root_ns->ns; + struct fs_prio *prio; + int start_level = 0; + + fs_for_each_prio(prio, ns) { + set_prio_attrs_in_prio(prio, start_level); + start_level += prio->num_levels; + } +} + +#define ANCHOR_PRIO 0 +#define ANCHOR_SIZE 1 +#define ANCHOR_LEVEL 0 +static int create_anchor_flow_table(struct mlx5_flow_steering *steering) +{ + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *ft; + + ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); + if (WARN_ON(!ns)) + return -EINVAL; + + ft_attr.max_fte = ANCHOR_SIZE; + ft_attr.level = ANCHOR_LEVEL; + ft_attr.prio = ANCHOR_PRIO; + + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + mlx5_core_err(steering->dev, "Failed to create last anchor flow table"); + return PTR_ERR(ft); + } + return 0; +} + +static int init_root_ns(struct mlx5_flow_steering *steering) +{ + int err; + + steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); + if (!steering->root_ns) + return -ENOMEM; + + err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node); + if (err) + goto out_err; + + set_prio_attrs(steering->root_ns); + err = create_anchor_flow_table(steering); + if (err) + goto out_err; + + return 0; + +out_err: + cleanup_root_ns(steering->root_ns); + steering->root_ns = NULL; + return err; +} + +static void clean_tree(struct fs_node *node) +{ + if (node) { + struct fs_node *iter; + struct fs_node *temp; + + tree_get_node(node); + list_for_each_entry_safe(iter, temp, &node->children, list) + clean_tree(iter); + tree_put_node(node); + tree_remove_node(node); + } +} + +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns) +{ + if (!root_ns) + return; + + clean_tree(&root_ns->ns.node); +} + +static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int i; + + if (!steering->esw_egress_root_ns) + return; + + for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) + cleanup_root_ns(steering->esw_egress_root_ns[i]); + + kfree(steering->esw_egress_root_ns); + steering->esw_egress_root_ns = NULL; +} + +static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int i; + + if (!steering->esw_ingress_root_ns) + return; + + for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) + cleanup_root_ns(steering->esw_ingress_root_ns[i]); + + kfree(steering->esw_ingress_root_ns); + steering->esw_ingress_root_ns = NULL; +} + +void mlx5_cleanup_fs(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + cleanup_root_ns(steering->root_ns); + cleanup_egress_acls_root_ns(dev); + cleanup_ingress_acls_root_ns(dev); + cleanup_root_ns(steering->fdb_root_ns); + cleanup_root_ns(steering->sniffer_rx_root_ns); + cleanup_root_ns(steering->sniffer_tx_root_ns); + cleanup_root_ns(steering->egress_root_ns); + mlx5_cleanup_fc_stats(dev); + kmem_cache_destroy(steering->ftes_cache); + kmem_cache_destroy(steering->fgs_cache); + kfree(steering); +} + +static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX); + if (!steering->sniffer_tx_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1); + if (IS_ERR(prio)) { + cleanup_root_ns(steering->sniffer_tx_root_ns); + return PTR_ERR(prio); + } + return 0; +} + +static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX); + if (!steering->sniffer_rx_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1); + if (IS_ERR(prio)) { + cleanup_root_ns(steering->sniffer_rx_root_ns); + return PTR_ERR(prio); + } + return 0; +} + +static int init_fdb_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); + if (!steering->fdb_root_ns) + return -ENOMEM; + + prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 2); + if (IS_ERR(prio)) + goto out_err; + + prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1); + if (IS_ERR(prio)) + goto out_err; + + set_prio_attrs(steering->fdb_root_ns); + return 0; + +out_err: + cleanup_root_ns(steering->fdb_root_ns); + steering->fdb_root_ns = NULL; + return PTR_ERR(prio); +} + +static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) +{ + struct fs_prio *prio; + + steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL); + if (!steering->esw_egress_root_ns[vport]) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} + +static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) +{ + struct fs_prio *prio; + + steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL); + if (!steering->esw_ingress_root_ns[vport]) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} + +static int init_egress_acls_root_ns(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int err; + int i; + + steering->esw_egress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev), + sizeof(*steering->esw_egress_root_ns), + GFP_KERNEL); + if (!steering->esw_egress_root_ns) + return -ENOMEM; + + for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) { + err = init_egress_acl_root_ns(steering, i); + if (err) + goto cleanup_root_ns; + } + + return 0; + +cleanup_root_ns: + for (i--; i >= 0; i--) + cleanup_root_ns(steering->esw_egress_root_ns[i]); + kfree(steering->esw_egress_root_ns); + steering->esw_egress_root_ns = NULL; + return err; +} + +static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int err; + int i; + + steering->esw_ingress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev), + sizeof(*steering->esw_ingress_root_ns), + GFP_KERNEL); + if (!steering->esw_ingress_root_ns) + return -ENOMEM; + + for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) { + err = init_ingress_acl_root_ns(steering, i); + if (err) + goto cleanup_root_ns; + } + + return 0; + +cleanup_root_ns: + for (i--; i >= 0; i--) + cleanup_root_ns(steering->esw_ingress_root_ns[i]); + kfree(steering->esw_ingress_root_ns); + steering->esw_ingress_root_ns = NULL; + return err; +} + +static int init_egress_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->egress_root_ns = create_root_ns(steering, + FS_FT_NIC_TX); + if (!steering->egress_root_ns) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} + +int mlx5_init_fs(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering; + int err = 0; + + err = mlx5_init_fc_stats(dev); + if (err) + return err; + + steering = kzalloc(sizeof(*steering), GFP_KERNEL); + if (!steering) + return -ENOMEM; + steering->dev = dev; + dev->priv.steering = steering; + + steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", + sizeof(struct mlx5_flow_group), 0, + 0, NULL); + steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, + 0, NULL); + if (!steering->ftes_cache || !steering->fgs_cache) { + err = -ENOMEM; + goto err; + } + + if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && + (MLX5_CAP_GEN(dev, nic_flow_table))) || + ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) && + MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { + err = init_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_ESWITCH_MANAGER(dev)) { + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { + err = init_fdb_root_ns(steering); + if (err) + goto err; + } + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { + err = init_egress_acls_root_ns(dev); + if (err) + goto err; + } + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { + err = init_ingress_acls_root_ns(dev); + if (err) + goto err; + } + } + + if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) { + err = init_sniffer_rx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) { + err = init_sniffer_tx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_IPSEC_DEV(dev)) { + err = init_egress_root_ns(steering); + if (err) + goto err; + } + + return 0; +err: + mlx5_cleanup_fs(dev); + return err; +} + +int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) +{ + struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + struct mlx5_ft_underlay_qp *new_uqp; + int err = 0; + + new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL); + if (!new_uqp) + return -ENOMEM; + + mutex_lock(&root->chain_lock); + + if (!root->root_ft) { + err = -EINVAL; + goto update_ft_fail; + } + + err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn, + false); + if (err) { + mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n", + underlay_qpn, err); + goto update_ft_fail; + } + + new_uqp->qpn = underlay_qpn; + list_add_tail(&new_uqp->list, &root->underlay_qpns); + + mutex_unlock(&root->chain_lock); + + return 0; + +update_ft_fail: + mutex_unlock(&root->chain_lock); + kfree(new_uqp); + return err; +} +EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn); + +int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) +{ + struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + struct mlx5_ft_underlay_qp *uqp; + bool found = false; + int err = 0; + + mutex_lock(&root->chain_lock); + list_for_each_entry(uqp, &root->underlay_qpns, list) { + if (uqp->qpn == underlay_qpn) { + found = true; + break; + } + } + + if (!found) { + mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n", + underlay_qpn); + err = -EINVAL; + goto out; + } + + err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn, + true); + if (err) + mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n", + underlay_qpn, err); + + list_del(&uqp->list); + mutex_unlock(&root->chain_lock); + kfree(uqp); + + return 0; + +out: + mutex_unlock(&root->chain_lock); + return err; +} +EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h new file mode 100644 index 000000000..ba62fbce2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_CORE_ +#define _MLX5_FS_CORE_ + +#include <linux/refcount.h> +#include <linux/mlx5/fs.h> +#include <linux/rhashtable.h> + +enum fs_node_type { + FS_TYPE_NAMESPACE, + FS_TYPE_PRIO, + FS_TYPE_FLOW_TABLE, + FS_TYPE_FLOW_GROUP, + FS_TYPE_FLOW_ENTRY, + FS_TYPE_FLOW_DEST +}; + +enum fs_flow_table_type { + FS_FT_NIC_RX = 0x0, + FS_FT_NIC_TX = 0x1, + FS_FT_ESW_EGRESS_ACL = 0x2, + FS_FT_ESW_INGRESS_ACL = 0x3, + FS_FT_FDB = 0X4, + FS_FT_SNIFFER_RX = 0X5, + FS_FT_SNIFFER_TX = 0X6, + FS_FT_MAX_TYPE = FS_FT_SNIFFER_TX, +}; + +enum fs_flow_table_op_mod { + FS_FT_OP_MOD_NORMAL, + FS_FT_OP_MOD_LAG_DEMUX, +}; + +enum fs_fte_status { + FS_FTE_STATUS_EXISTING = 1UL << 0, +}; + +struct mlx5_flow_steering { + struct mlx5_core_dev *dev; + struct kmem_cache *fgs_cache; + struct kmem_cache *ftes_cache; + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_root_namespace *fdb_root_ns; + struct mlx5_flow_root_namespace **esw_egress_root_ns; + struct mlx5_flow_root_namespace **esw_ingress_root_ns; + struct mlx5_flow_root_namespace *sniffer_tx_root_ns; + struct mlx5_flow_root_namespace *sniffer_rx_root_ns; + struct mlx5_flow_root_namespace *egress_root_ns; +}; + +struct fs_node { + struct list_head list; + struct list_head children; + enum fs_node_type type; + struct fs_node *parent; + struct fs_node *root; + /* lock the node for writing and traversing */ + struct rw_semaphore lock; + refcount_t refcount; + bool active; + void (*del_hw_func)(struct fs_node *); + void (*del_sw_func)(struct fs_node *); + atomic_t version; +}; + +struct mlx5_flow_rule { + struct fs_node node; + struct mlx5_flow_destination dest_attr; + /* next_ft should be accessed under chain_lock and only of + * destination type is FWD_NEXT_fT. + */ + struct list_head next_ft; + u32 sw_action; +}; + +struct mlx5_flow_handle { + int num_rules; + struct mlx5_flow_rule *rule[]; +}; + +/* Type of children is mlx5_flow_group */ +struct mlx5_flow_table { + struct fs_node node; + u32 id; + u16 vport; + unsigned int max_fte; + unsigned int level; + enum fs_flow_table_type type; + enum fs_flow_table_op_mod op_mod; + struct { + bool active; + unsigned int required_groups; + unsigned int group_size; + unsigned int num_groups; + } autogroup; + /* Protect fwd_rules */ + struct mutex lock; + /* FWD rules that point on this flow table */ + struct list_head fwd_rules; + u32 flags; + struct rhltable fgs_hash; +}; + +struct mlx5_fc_cache { + u64 packets; + u64 bytes; + u64 lastuse; +}; + +struct mlx5_fc { + struct rb_node node; + struct list_head list; + + /* last{packets,bytes} members are used when calculating the delta since + * last reading + */ + u64 lastpackets; + u64 lastbytes; + + u32 id; + bool deleted; + bool aging; + + struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; +}; + +struct mlx5_ft_underlay_qp { + struct list_head list; + u32 qpn; +}; + +#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_800 +/* Calculate the fte_match_param length and without the reserved length. + * Make sure the reserved field is the last. + */ +#define MLX5_ST_SZ_DW_MATCH_PARAM \ + ((MLX5_BYTE_OFF(fte_match_param, MLX5_FTE_MATCH_PARAM_RESERVED) / sizeof(u32)) + \ + BUILD_BUG_ON_ZERO(MLX5_ST_SZ_BYTES(fte_match_param) != \ + MLX5_FLD_SZ_BYTES(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED) +\ + MLX5_BYTE_OFF(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED))) + +/* Type of children is mlx5_flow_rule */ +struct fs_fte { + struct fs_node node; + u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; + u32 dests_size; + u32 index; + struct mlx5_flow_act action; + enum fs_fte_status status; + struct mlx5_fc *counter; + struct rhash_head hash; +}; + +/* Type of children is mlx5_flow_table/namespace */ +struct fs_prio { + struct fs_node node; + unsigned int num_levels; + unsigned int start_level; + unsigned int prio; + unsigned int num_ft; +}; + +/* Type of children is fs_prio */ +struct mlx5_flow_namespace { + /* parent == NULL => root ns */ + struct fs_node node; +}; + +struct mlx5_flow_group_mask { + u8 match_criteria_enable; + u32 match_criteria[MLX5_ST_SZ_DW_MATCH_PARAM]; +}; + +/* Type of children is fs_fte */ +struct mlx5_flow_group { + struct fs_node node; + struct mlx5_flow_group_mask mask; + u32 start_index; + u32 max_ftes; + struct ida fte_allocator; + u32 id; + struct rhashtable ftes_hash; + struct rhlist_head hash; +}; + +struct mlx5_flow_root_namespace { + struct mlx5_flow_namespace ns; + enum fs_flow_table_type table_type; + struct mlx5_core_dev *dev; + struct mlx5_flow_table *root_ft; + /* Should be held when chaining flow tables */ + struct mutex chain_lock; + struct list_head underlay_qpns; + const struct mlx5_flow_cmds *cmds; +}; + +int mlx5_init_fc_stats(struct mlx5_core_dev *dev); +void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev); +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay); +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval); + +int mlx5_init_fs(struct mlx5_core_dev *dev); +void mlx5_cleanup_fs(struct mlx5_core_dev *dev); + +#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } + +#define fs_list_for_each_entry(pos, root) \ + list_for_each_entry(pos, root, node.list) + +#define fs_list_for_each_entry_safe(pos, tmp, root) \ + list_for_each_entry_safe(pos, tmp, root, node.list) + +#define fs_for_each_ns_or_ft_reverse(pos, prio) \ + list_for_each_entry_reverse(pos, &(prio)->node.children, list) + +#define fs_for_each_ns_or_ft(pos, prio) \ + list_for_each_entry(pos, (&(prio)->node.children), list) + +#define fs_for_each_prio(pos, ns) \ + fs_list_for_each_entry(pos, &(ns)->node.children) + +#define fs_for_each_ns(pos, prio) \ + fs_list_for_each_entry(pos, &(prio)->node.children) + +#define fs_for_each_ft(pos, prio) \ + fs_list_for_each_entry(pos, &(prio)->node.children) + +#define fs_for_each_ft_safe(pos, tmp, prio) \ + fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children) + +#define fs_for_each_fg(pos, ft) \ + fs_list_for_each_entry(pos, &(ft)->node.children) + +#define fs_for_each_fte(pos, fg) \ + fs_list_for_each_entry(pos, &(fg)->node.children) + +#define fs_for_each_dst(pos, fte) \ + fs_list_for_each_entry(pos, &(fte)->node.children) + +#define MLX5_CAP_FLOWTABLE_TYPE(mdev, cap, type) ( \ + (type == FS_FT_NIC_RX) ? MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) : \ + (type == FS_FT_ESW_EGRESS_ACL) ? MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) : \ + (type == FS_FT_ESW_INGRESS_ACL) ? MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) : \ + (type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \ + (type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) : \ + (type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \ + (BUILD_BUG_ON_ZERO(FS_FT_SNIFFER_TX != FS_FT_MAX_TYPE))\ + ) + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c new file mode 100644 index 000000000..808ddd732 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -0,0 +1,360 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/fs.h> +#include <linux/rbtree.h> +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" + +#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) +/* Max number of counters to query in bulk read is 32K */ +#define MLX5_SW_MAX_COUNTERS_BULK BIT(15) + +/* locking scheme: + * + * It is the responsibility of the user to prevent concurrent calls or bad + * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference + * to struct mlx5_fc. + * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a + * dump (access to struct mlx5_fc) after a counter is destroyed. + * + * access to counter list: + * - create (user context) + * - mlx5_fc_create() only adds to an addlist to be used by + * mlx5_fc_stats_query_work(). addlist is protected by a spinlock. + * - spawn thread to do the actual destroy + * + * - destroy (user context) + * - mark a counter as deleted + * - spawn thread to do the actual del + * + * - dump (user context) + * user should not call dump after destroy + * + * - query (single thread workqueue context) + * destroy/dump - no conflict (see destroy) + * query/dump - packets and bytes might be inconsistent (since update is not + * atomic) + * query/create - no conflict (see create) + * since every create/destroy spawn the work, only after necessary time has + * elapsed, the thread will actually query the hardware. + */ + +static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) +{ + struct rb_node **new = &root->rb_node; + struct rb_node *parent = NULL; + + while (*new) { + struct mlx5_fc *this = rb_entry(*new, struct mlx5_fc, node); + int result = counter->id - this->id; + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&counter->node, parent, new); + rb_insert_color(&counter->node, root); +} + +/* The function returns the last node that was queried so the caller + * function can continue calling it till all counters are queried. + */ +static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, + struct mlx5_fc *first, + u32 last_id) +{ + struct mlx5_cmd_fc_bulk *b; + struct rb_node *node = NULL; + u32 afirst_id; + int num; + int err; + + int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); + + /* first id must be aligned to 4 when using bulk query */ + afirst_id = first->id & ~0x3; + + /* number of counters to query inc. the last counter */ + num = ALIGN(last_id - afirst_id + 1, 4); + if (num > max_bulk) { + num = max_bulk; + last_id = afirst_id + num - 1; + } + + b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num); + if (!b) { + mlx5_core_err(dev, "Error allocating resources for bulk query\n"); + return NULL; + } + + err = mlx5_cmd_fc_bulk_query(dev, b); + if (err) { + mlx5_core_err(dev, "Error doing bulk query: %d\n", err); + goto out; + } + + for (node = &first->node; node; node = rb_next(node)) { + struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node); + struct mlx5_fc_cache *c = &counter->cache; + u64 packets; + u64 bytes; + + if (counter->id > last_id) + break; + + mlx5_cmd_fc_bulk_get(dev, b, + counter->id, &packets, &bytes); + + if (c->packets == packets) + continue; + + c->packets = packets; + c->bytes = bytes; + c->lastuse = jiffies; + } + +out: + mlx5_cmd_fc_bulk_free(b); + + return node; +} + +static void mlx5_fc_stats_work(struct work_struct *work) +{ + struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, + priv.fc_stats.work.work); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + unsigned long now = jiffies; + struct mlx5_fc *counter = NULL; + struct mlx5_fc *last = NULL; + struct rb_node *node; + LIST_HEAD(tmplist); + + spin_lock(&fc_stats->addlist_lock); + + list_splice_tail_init(&fc_stats->addlist, &tmplist); + + if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters)) + queue_delayed_work(fc_stats->wq, &fc_stats->work, + fc_stats->sampling_interval); + + spin_unlock(&fc_stats->addlist_lock); + + list_for_each_entry(counter, &tmplist, list) + mlx5_fc_stats_insert(&fc_stats->counters, counter); + + node = rb_first(&fc_stats->counters); + while (node) { + counter = rb_entry(node, struct mlx5_fc, node); + + node = rb_next(node); + + if (counter->deleted) { + rb_erase(&counter->node, &fc_stats->counters); + + mlx5_cmd_fc_free(dev, counter->id); + + kfree(counter); + continue; + } + + last = counter; + } + + if (time_before(now, fc_stats->next_query) || !last) + return; + + node = rb_first(&fc_stats->counters); + while (node) { + counter = rb_entry(node, struct mlx5_fc, node); + + node = mlx5_fc_stats_query(dev, counter, last->id); + } + + fc_stats->next_query = now + fc_stats->sampling_interval; +} + +struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc *counter; + int err; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + + err = mlx5_cmd_fc_alloc(dev, &counter->id); + if (err) + goto err_out; + + if (aging) { + counter->cache.lastuse = jiffies; + counter->aging = true; + + spin_lock(&fc_stats->addlist_lock); + list_add(&counter->list, &fc_stats->addlist); + spin_unlock(&fc_stats->addlist_lock); + + mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); + } + + return counter; + +err_out: + kfree(counter); + + return ERR_PTR(err); +} +EXPORT_SYMBOL(mlx5_fc_create); + +void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (!counter) + return; + + if (counter->aging) { + counter->deleted = true; + mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); + return; + } + + mlx5_cmd_fc_free(dev, counter->id); + kfree(counter); +} +EXPORT_SYMBOL(mlx5_fc_destroy); + +int mlx5_init_fc_stats(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + fc_stats->counters = RB_ROOT; + INIT_LIST_HEAD(&fc_stats->addlist); + spin_lock_init(&fc_stats->addlist_lock); + + fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); + if (!fc_stats->wq) + return -ENOMEM; + + fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD; + INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); + + return 0; +} + +void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc *counter; + struct mlx5_fc *tmp; + struct rb_node *node; + + cancel_delayed_work_sync(&dev->priv.fc_stats.work); + destroy_workqueue(dev->priv.fc_stats.wq); + dev->priv.fc_stats.wq = NULL; + + list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) { + list_del(&counter->list); + + mlx5_cmd_fc_free(dev, counter->id); + + kfree(counter); + } + + node = rb_first(&fc_stats->counters); + while (node) { + counter = rb_entry(node, struct mlx5_fc, node); + + node = rb_next(node); + + rb_erase(&counter->node, &fc_stats->counters); + + mlx5_cmd_fc_free(dev, counter->id); + + kfree(counter); + } +} + +int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, + u64 *packets, u64 *bytes) +{ + return mlx5_cmd_fc_query(dev, counter->id, packets, bytes); +} +EXPORT_SYMBOL(mlx5_fc_query); + +u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter) +{ + return counter->cache.lastuse; +} + +void mlx5_fc_query_cached(struct mlx5_fc *counter, + u64 *bytes, u64 *packets, u64 *lastuse) +{ + struct mlx5_fc_cache c; + + c = counter->cache; + + *bytes = c.bytes - counter->lastbytes; + *packets = c.packets - counter->lastpackets; + *lastuse = c.lastuse; + + counter->lastbytes = c.bytes; + counter->lastpackets = c.packets; +} + +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + queue_delayed_work(fc_stats->wq, dwork, delay); +} + +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + fc_stats->sampling_interval = min_t(unsigned long, interval, + fc_stats->sampling_interval); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c new file mode 100644 index 000000000..41ad24f0d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -0,0 +1,527 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include <linux/mlx5/eswitch.h> +#include <linux/module.h> +#include "mlx5_core.h" +#include "../../mlxfw/mlxfw.h" + +static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {0}; + + MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_query_board_id(struct mlx5_core_dev *dev) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_adapter_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_cmd_query_adapter(dev, out, outlen); + if (err) + goto out; + + memcpy(dev->board_id, + MLX5_ADDR_OF(query_adapter_out, out, + query_adapter_struct.vsd_contd_psid), + MLX5_FLD_SZ_BYTES(query_adapter_out, + query_adapter_struct.vsd_contd_psid)); + +out: + kfree(out); + return err; +} + +int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_adapter_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_cmd_query_adapter(mdev, out, outlen); + if (err) + goto out; + + *vendor_id = MLX5_GET(query_adapter_out, out, + query_adapter_struct.ieee_vendor_id); +out: + kfree(out); + return err; +} +EXPORT_SYMBOL(mlx5_core_query_vendor_id); + +static int mlx5_get_pcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_pcam_reg(dev, dev->caps.pcam, + MLX5_PCAM_FEATURE_ENHANCED_FEATURES, + MLX5_PCAM_REGS_5000_TO_507F); +} + +static int mlx5_get_mcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_mcam_reg(dev, dev->caps.mcam, + MLX5_MCAM_FEATURE_ENHANCED_FEATURES, + MLX5_MCAM_REGS_FIRST_128); +} + +static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_qcam_reg(dev, dev->caps.qcam, + MLX5_QCAM_FEATURE_ENHANCED_FEATURES, + MLX5_QCAM_REGS_FIRST_128); +} + +int mlx5_query_hca_caps(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); + if (err) + return err; + + if (MLX5_CAP_GEN(dev, eth_net_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_IPOIB_ENHANCED_OFFLOADS); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, pg)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, atomic)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, roce)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_flow_table) || + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, vport_group_manager) && + MLX5_ESWITCH_MANAGER(dev)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE); + if (err) + return err; + } + + if (MLX5_ESWITCH_MANAGER(dev)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, vector_calc)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_VECTOR_CALC); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, qos)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_QOS); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, debug)) + mlx5_core_get_caps(dev, MLX5_CAP_DEBUG); + + if (MLX5_CAP_GEN(dev, pcam_reg)) + mlx5_get_pcam_reg(dev); + + if (MLX5_CAP_GEN(dev, mcam_reg)) + mlx5_get_mcam_reg(dev); + + if (MLX5_CAP_GEN(dev, qcam_reg)) + mlx5_get_qcam_reg(dev); + + if (MLX5_CAP_GEN(dev, device_memory)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_MEM); + if (err) + return err; + } + + return 0; +} + +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id) +{ + u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {0}; + int i; + + MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA); + + if (MLX5_CAP_GEN(dev, sw_owner_id)) { + for (i = 0; i < 4; i++) + MLX5_ARRAY_SET(init_hca_in, in, sw_owner_id, i, + sw_owner_id[i]); + } + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0}; + + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0}; + int force_state; + int ret; + + if (!MLX5_CAP_GEN(dev, force_teardown)) { + mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + MLX5_SET(teardown_hca_in, in, profile, MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE); + + ret = mlx5_cmd_exec_polling(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + force_state = MLX5_GET(teardown_hca_out, out, force_state); + if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) { + mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n"); + return -EIO; + } + + return 0; +} + +enum mlxsw_reg_mcc_instruction { + MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01, + MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02, + MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT = 0x03, + MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT = 0x04, + MLX5_REG_MCC_INSTRUCTION_ACTIVATE = 0x06, + MLX5_REG_MCC_INSTRUCTION_CANCEL = 0x08, +}; + +static int mlx5_reg_mcc_set(struct mlx5_core_dev *dev, + enum mlxsw_reg_mcc_instruction instr, + u16 component_index, u32 update_handle, + u32 component_size) +{ + u32 out[MLX5_ST_SZ_DW(mcc_reg)]; + u32 in[MLX5_ST_SZ_DW(mcc_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(mcc_reg, in, instruction, instr); + MLX5_SET(mcc_reg, in, component_index, component_index); + MLX5_SET(mcc_reg, in, update_handle, update_handle); + MLX5_SET(mcc_reg, in, component_size, component_size); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCC, 0, 1); +} + +static int mlx5_reg_mcc_query(struct mlx5_core_dev *dev, + u32 *update_handle, u8 *error_code, + u8 *control_state) +{ + u32 out[MLX5_ST_SZ_DW(mcc_reg)]; + u32 in[MLX5_ST_SZ_DW(mcc_reg)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + MLX5_SET(mcc_reg, in, update_handle, *update_handle); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCC, 0, 0); + if (err) + goto out; + + *update_handle = MLX5_GET(mcc_reg, out, update_handle); + *error_code = MLX5_GET(mcc_reg, out, error_code); + *control_state = MLX5_GET(mcc_reg, out, control_state); + +out: + return err; +} + +static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev, + u32 update_handle, + u32 offset, u16 size, + u8 *data) +{ + int err, in_size = MLX5_ST_SZ_BYTES(mcda_reg) + size; + u32 out[MLX5_ST_SZ_DW(mcda_reg)]; + int i, j, dw_size = size >> 2; + __be32 data_element; + u32 *in; + + in = kzalloc(in_size, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(mcda_reg, in, update_handle, update_handle); + MLX5_SET(mcda_reg, in, offset, offset); + MLX5_SET(mcda_reg, in, size, size); + + for (i = 0; i < dw_size; i++) { + j = i * 4; + data_element = htonl(*(u32 *)&data[j]); + memcpy(MLX5_ADDR_OF(mcda_reg, in, data) + j, &data_element, 4); + } + + err = mlx5_core_access_reg(dev, in, in_size, out, + sizeof(out), MLX5_REG_MCDA, 0, 1); + kfree(in); + return err; +} + +static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev, + u16 component_index, + u32 *max_component_size, + u8 *log_mcda_word_size, + u16 *mcda_max_write_size) +{ + u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_ST_SZ_DW(mcqi_cap)]; + int offset = MLX5_ST_SZ_DW(mcqi_reg); + u32 in[MLX5_ST_SZ_DW(mcqi_reg)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(mcqi_reg, in, component_index, component_index); + MLX5_SET(mcqi_reg, in, data_size, MLX5_ST_SZ_BYTES(mcqi_cap)); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCQI, 0, 0); + if (err) + goto out; + + *max_component_size = MLX5_GET(mcqi_cap, out + offset, max_component_size); + *log_mcda_word_size = MLX5_GET(mcqi_cap, out + offset, log_mcda_word_size); + *mcda_max_write_size = MLX5_GET(mcqi_cap, out + offset, mcda_max_write_size); + +out: + return err; +} + +struct mlx5_mlxfw_dev { + struct mlxfw_dev mlxfw_dev; + struct mlx5_core_dev *mlx5_core_dev; +}; + +static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev, + u16 component_index, u32 *p_max_size, + u8 *p_align_bits, u16 *p_max_write_size) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcqi_query(dev, component_index, p_max_size, + p_align_bits, p_max_write_size); +} + +static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u8 control_state, error_code; + int err; + + *fwhandle = 0; + err = mlx5_reg_mcc_query(dev, fwhandle, &error_code, &control_state); + if (err) + return err; + + if (control_state != MLXFW_FSM_STATE_IDLE) + return -EBUSY; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE, + 0, *fwhandle, 0); +} + +static int mlx5_fsm_component_update(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index, u32 component_size) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT, + component_index, fwhandle, component_size); +} + +static int mlx5_fsm_block_download(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u8 *data, u16 size, u32 offset) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcda_set(dev, fwhandle, offset, size, data); +} + +static int mlx5_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT, + component_index, fwhandle, 0); +} + +static int mlx5_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_ACTIVATE, 0, + fwhandle, 0); +} + +static int mlx5_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + enum mlxfw_fsm_state *fsm_state, + enum mlxfw_fsm_state_err *fsm_state_err) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u8 control_state, error_code; + int err; + + err = mlx5_reg_mcc_query(dev, &fwhandle, &error_code, &control_state); + if (err) + return err; + + *fsm_state = control_state; + *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code, + MLXFW_FSM_STATE_ERR_MAX); + return 0; +} + +static void mlx5_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_CANCEL, 0, fwhandle, 0); +} + +static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0, + fwhandle, 0); +} + +static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = { + .component_query = mlx5_component_query, + .fsm_lock = mlx5_fsm_lock, + .fsm_component_update = mlx5_fsm_component_update, + .fsm_block_download = mlx5_fsm_block_download, + .fsm_component_verify = mlx5_fsm_component_verify, + .fsm_activate = mlx5_fsm_activate, + .fsm_query_state = mlx5_fsm_query_state, + .fsm_cancel = mlx5_fsm_cancel, + .fsm_release = mlx5_fsm_release +}; + +int mlx5_firmware_flash(struct mlx5_core_dev *dev, + const struct firmware *firmware) +{ + struct mlx5_mlxfw_dev mlx5_mlxfw_dev = { + .mlxfw_dev = { + .ops = &mlx5_mlxfw_dev_ops, + .psid = dev->board_id, + .psid_size = strlen(dev->board_id), + }, + .mlx5_core_dev = dev + }; + + if (!MLX5_CAP_GEN(dev, mcam_reg) || + !MLX5_CAP_MCAM_REG(dev, mcqi) || + !MLX5_CAP_MCAM_REG(dev, mcc) || + !MLX5_CAP_MCAM_REG(dev, mcda)) { + pr_info("%s flashing isn't supported by the running FW\n", __func__); + return -EOPNOTSUPP; + } + + return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, firmware); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c new file mode 100644 index 000000000..9f39aeca8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/vmalloc.h> +#include <linux/hardirq.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +enum { + MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, + MAX_MISSES = 3, +}; + +enum { + MLX5_HEALTH_SYNDR_FW_ERR = 0x1, + MLX5_HEALTH_SYNDR_IRISC_ERR = 0x7, + MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR = 0x8, + MLX5_HEALTH_SYNDR_CRC_ERR = 0x9, + MLX5_HEALTH_SYNDR_FETCH_PCI_ERR = 0xa, + MLX5_HEALTH_SYNDR_HW_FTL_ERR = 0xb, + MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR = 0xc, + MLX5_HEALTH_SYNDR_EQ_ERR = 0xd, + MLX5_HEALTH_SYNDR_EQ_INV = 0xe, + MLX5_HEALTH_SYNDR_FFSER_ERR = 0xf, + MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10 +}; + +enum { + MLX5_NIC_IFC_FULL = 0, + MLX5_NIC_IFC_DISABLED = 1, + MLX5_NIC_IFC_NO_DRAM_NIC = 2, + MLX5_NIC_IFC_INVALID = 3 +}; + +enum { + MLX5_DROP_NEW_HEALTH_WORK, + MLX5_DROP_NEW_RECOVERY_WORK, +}; + +static u8 get_nic_state(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; +} + +static void trigger_cmd_completions(struct mlx5_core_dev *dev) +{ + unsigned long flags; + u64 vector; + + /* wait for pending handlers to complete */ + synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD)); + spin_lock_irqsave(&dev->cmd.alloc_lock, flags); + vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); + if (!vector) + goto no_trig; + + vector |= MLX5_TRIGGERED_CMD_COMP; + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); + + mlx5_core_dbg(dev, "vector 0x%llx\n", vector); + mlx5_cmd_comp_handler(dev, vector, true); + return; + +no_trig: + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); +} + +static int in_fatal(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + + if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + return 1; + + if (ioread32be(&h->fw_ver) == 0xffffffff) + return 1; + + return 0; +} + +void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) +{ + mutex_lock(&dev->intf_state_mutex); + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto unlock; + + mlx5_core_err(dev, "start\n"); + if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + trigger_cmd_completions(dev); + } + + mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); + mlx5_core_err(dev, "end\n"); + +unlock: + mutex_unlock(&dev->intf_state_mutex); +} + +static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) +{ + u8 nic_interface = get_nic_state(dev); + + switch (nic_interface) { + case MLX5_NIC_IFC_FULL: + mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n"); + break; + + case MLX5_NIC_IFC_DISABLED: + mlx5_core_warn(dev, "starting teardown\n"); + break; + + case MLX5_NIC_IFC_NO_DRAM_NIC: + mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n"); + break; + default: + mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n", + nic_interface); + } + + mlx5_disable_device(dev); +} + +static void health_recover(struct work_struct *work) +{ + struct mlx5_core_health *health; + struct delayed_work *dwork; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + u8 nic_state; + + dwork = container_of(work, struct delayed_work, work); + health = container_of(dwork, struct mlx5_core_health, recover_work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + + nic_state = get_nic_state(dev); + if (nic_state == MLX5_NIC_IFC_INVALID) { + dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n"); + return; + } + + dev_err(&dev->pdev->dev, "starting health recovery flow\n"); + mlx5_recover_device(dev); +} + +/* How much time to wait until health resetting the driver (in msecs) */ +#define MLX5_RECOVERY_DELAY_MSECS 60000 +static void health_care(struct work_struct *work) +{ + unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS); + struct mlx5_core_health *health; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + unsigned long flags; + + health = container_of(work, struct mlx5_core_health, work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + mlx5_core_warn(dev, "handling bad device here\n"); + mlx5_handle_bad_state(dev); + + spin_lock_irqsave(&health->wq_lock, flags); + if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) + schedule_delayed_work(&health->recover_work, recover_delay); + else + dev_err(&dev->pdev->dev, + "new health works are not permitted at this stage\n"); + spin_unlock_irqrestore(&health->wq_lock, flags); +} + +static const char *hsynd_str(u8 synd) +{ + switch (synd) { + case MLX5_HEALTH_SYNDR_FW_ERR: + return "firmware internal error"; + case MLX5_HEALTH_SYNDR_IRISC_ERR: + return "irisc not responding"; + case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR: + return "unrecoverable hardware error"; + case MLX5_HEALTH_SYNDR_CRC_ERR: + return "firmware CRC error"; + case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR: + return "ICM fetch PCI error"; + case MLX5_HEALTH_SYNDR_HW_FTL_ERR: + return "HW fatal error\n"; + case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR: + return "async EQ buffer overrun"; + case MLX5_HEALTH_SYNDR_EQ_ERR: + return "EQ error"; + case MLX5_HEALTH_SYNDR_EQ_INV: + return "Invalid EQ referenced"; + case MLX5_HEALTH_SYNDR_FFSER_ERR: + return "FFSER error"; + case MLX5_HEALTH_SYNDR_HIGH_TEMP: + return "High temperature"; + default: + return "unrecognized error"; + } +} + +static void print_health_info(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + char fw_str[18]; + u32 fw; + int i; + + /* If the syndrome is 0, the device is OK and no need to print buffer */ + if (!ioread8(&h->synd)) + return; + + for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) + dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i)); + + dev_err(&dev->pdev->dev, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr)); + dev_err(&dev->pdev->dev, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra)); + sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev)); + dev_err(&dev->pdev->dev, "fw_ver %s\n", fw_str); + dev_err(&dev->pdev->dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); + dev_err(&dev->pdev->dev, "irisc_index %d\n", ioread8(&h->irisc_index)); + dev_err(&dev->pdev->dev, "synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd))); + dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); + fw = ioread32be(&h->fw_ver); + dev_err(&dev->pdev->dev, "raw fw_ver 0x%08x\n", fw); +} + +static unsigned long get_next_poll_jiffies(void) +{ + unsigned long next; + + get_random_bytes(&next, sizeof(next)); + next %= HZ; + next += jiffies + MLX5_HEALTH_POLL_INTERVAL; + + return next; +} + +void mlx5_trigger_health_work(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; + + spin_lock_irqsave(&health->wq_lock, flags); + if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) + queue_work(health->wq, &health->work); + else + dev_err(&dev->pdev->dev, + "new health works are not permitted at this stage\n"); + spin_unlock_irqrestore(&health->wq_lock, flags); +} + +static void poll_health(struct timer_list *t) +{ + struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); + struct mlx5_core_health *health = &dev->priv.health; + u32 count; + + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto out; + + count = ioread32be(health->health_counter); + if (count == health->prev) + ++health->miss_counter; + else + health->miss_counter = 0; + + health->prev = count; + if (health->miss_counter == MAX_MISSES) { + dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n"); + print_health_info(dev); + } + + if (in_fatal(dev) && !health->sick) { + health->sick = true; + print_health_info(dev); + mlx5_trigger_health_work(dev); + } + +out: + mod_timer(&health->timer, get_next_poll_jiffies()); +} + +void mlx5_start_health_poll(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + timer_setup(&health->timer, poll_health, 0); + health->sick = 0; + clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); + health->health = &dev->iseg->health; + health->health_counter = &dev->iseg->health_counter; + + health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL); + add_timer(&health->timer); +} + +void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) +{ + struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; + + if (disable_health) { + spin_lock_irqsave(&health->wq_lock, flags); + set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); + spin_unlock_irqrestore(&health->wq_lock, flags); + } + + del_timer_sync(&health->timer); +} + +void mlx5_drain_health_wq(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; + + spin_lock_irqsave(&health->wq_lock, flags); + set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); + spin_unlock_irqrestore(&health->wq_lock, flags); + cancel_delayed_work_sync(&health->recover_work); + cancel_work_sync(&health->work); +} + +void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; + + spin_lock_irqsave(&health->wq_lock, flags); + set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); + spin_unlock_irqrestore(&health->wq_lock, flags); + cancel_delayed_work_sync(&dev->priv.health.recover_work); +} + +void mlx5_health_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + destroy_workqueue(health->wq); +} + +int mlx5_health_init(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health; + char *name; + + health = &dev->priv.health; + name = kmalloc(64, GFP_KERNEL); + if (!name) + return -ENOMEM; + + strcpy(name, "mlx5_health"); + strcat(name, dev_name(&dev->pdev->dev)); + health->wq = create_singlethread_workqueue(name); + kfree(name); + if (!health->wq) + return -ENOMEM; + spin_lock_init(&health->wq_lock); + INIT_WORK(&health->work, health_care); + INIT_DELAYED_WORK(&health->recover_work, health_recover); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile new file mode 100644 index 000000000..d8e17110f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c new file mode 100644 index 000000000..90cb50fe1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" +#include "ipoib.h" + +static void mlx5i_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_drvinfo(priv, drvinfo); + strlcpy(drvinfo->driver, DRIVER_NAME "[ib_ipoib]", + sizeof(drvinfo->driver)); +} + +static void mlx5i_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_strings(priv, stringset, data); +} + +static int mlx5i_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_get_sset_count(priv, sset); +} + +static void mlx5i_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); +} + +static int mlx5i_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_set_ringparam(priv, param); +} + +static void mlx5i_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_ringparam(priv, param); +} + +static int mlx5i_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_set_channels(priv, ch); +} + +static void mlx5i_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +static int mlx5i_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal); +} + +static int mlx5i_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal); +} + +static int mlx5i_get_ts_info(struct net_device *netdev, + struct ethtool_ts_info *info) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_get_ts_info(priv, info); +} + +static int mlx5i_flash_device(struct net_device *netdev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + +enum mlx5_ptys_width { + MLX5_PTYS_WIDTH_1X = 1 << 0, + MLX5_PTYS_WIDTH_2X = 1 << 1, + MLX5_PTYS_WIDTH_4X = 1 << 2, + MLX5_PTYS_WIDTH_8X = 1 << 3, + MLX5_PTYS_WIDTH_12X = 1 << 4, +}; + +static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width) +{ + switch (width) { + case MLX5_PTYS_WIDTH_1X: return 1; + case MLX5_PTYS_WIDTH_2X: return 2; + case MLX5_PTYS_WIDTH_4X: return 4; + case MLX5_PTYS_WIDTH_8X: return 8; + case MLX5_PTYS_WIDTH_12X: return 12; + default: return -1; + } +} + +enum mlx5_ptys_rate { + MLX5_PTYS_RATE_SDR = 1 << 0, + MLX5_PTYS_RATE_DDR = 1 << 1, + MLX5_PTYS_RATE_QDR = 1 << 2, + MLX5_PTYS_RATE_FDR10 = 1 << 3, + MLX5_PTYS_RATE_FDR = 1 << 4, + MLX5_PTYS_RATE_EDR = 1 << 5, + MLX5_PTYS_RATE_HDR = 1 << 6, +}; + +static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate) +{ + switch (rate) { + case MLX5_PTYS_RATE_SDR: return 2500; + case MLX5_PTYS_RATE_DDR: return 5000; + case MLX5_PTYS_RATE_QDR: + case MLX5_PTYS_RATE_FDR10: return 10000; + case MLX5_PTYS_RATE_FDR: return 14000; + case MLX5_PTYS_RATE_EDR: return 25000; + case MLX5_PTYS_RATE_HDR: return 50000; + default: return -1; + } +} + +static int mlx5i_get_port_settings(struct net_device *netdev, + u16 *ib_link_width_oper, u16 *ib_proto_oper) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; + int ret; + + ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1); + if (ret) + return ret; + + *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); + *ib_proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + + return 0; +} + +static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) +{ + int rate, width; + + rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper); + if (rate < 0) + return -EINVAL; + width = mlx5_ptys_width_enum_to_int(ib_link_width_oper); + if (width < 0) + return -EINVAL; + + return rate * width; +} + +static int mlx5i_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + u16 ib_link_width_oper; + u16 ib_proto_oper; + int speed, ret; + + ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper); + if (ret) + return ret; + + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + + speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper); + if (speed < 0) + return -EINVAL; + + link_ksettings->base.duplex = DUPLEX_FULL; + link_ksettings->base.port = PORT_OTHER; + + link_ksettings->base.autoneg = AUTONEG_DISABLE; + + link_ksettings->base.speed = speed; + + return 0; +} + +const struct ethtool_ops mlx5i_ethtool_ops = { + .get_drvinfo = mlx5i_get_drvinfo, + .get_strings = mlx5i_get_strings, + .get_sset_count = mlx5i_get_sset_count, + .get_ethtool_stats = mlx5i_get_ethtool_stats, + .get_ringparam = mlx5i_get_ringparam, + .set_ringparam = mlx5i_set_ringparam, + .flash_device = mlx5i_flash_device, + .get_channels = mlx5i_get_channels, + .set_channels = mlx5i_set_channels, + .get_coalesce = mlx5i_get_coalesce, + .set_coalesce = mlx5i_set_coalesce, + .get_ts_info = mlx5i_get_ts_info, + .get_link_ksettings = mlx5i_get_link_ksettings, + .get_link = ethtool_op_get_link, +}; + +const struct ethtool_ops mlx5i_pkey_ethtool_ops = { + .get_drvinfo = mlx5i_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ts_info = mlx5i_get_ts_info, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c new file mode 100644 index 000000000..db6aafcce --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -0,0 +1,697 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_verbs.h> +#include <linux/mlx5/fs.h> +#include "en.h" +#include "ipoib.h" + +#define IB_DEFAULT_Q_KEY 0xb1b +#define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9 + +static int mlx5i_open(struct net_device *netdev); +static int mlx5i_close(struct net_device *netdev); +static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu); + +static const struct net_device_ops mlx5i_netdev_ops = { + .ndo_open = mlx5i_open, + .ndo_stop = mlx5i_close, + .ndo_init = mlx5i_dev_init, + .ndo_uninit = mlx5i_dev_cleanup, + .ndo_change_mtu = mlx5i_change_mtu, + .ndo_do_ioctl = mlx5i_ioctl, +}; + +/* IPoIB mlx5 netdev profile */ +static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, false); + mlx5e_set_rq_type(mdev, params); + mlx5e_init_rq_type_params(mdev, params); + + /* RQ size in ipoib by default is 512 */ + params->log_rq_mtu_frames = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE; + + params->lro_en = false; + params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; +} + +/* Called directly after IPoIB netdevice was created to initialize SW structs */ +void mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + u16 max_mtu; + + /* priv init */ + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; + mutex_init(&priv->state_lock); + + mlx5_query_port_max_mtu(mdev, &max_mtu, 1); + netdev->mtu = max_mtu; + + mlx5e_build_nic_params(mdev, &priv->channels.params, + profile->max_nch(mdev), netdev->mtu); + mlx5i_build_nic_params(mdev, &priv->channels.params); + + mlx5e_timestamp_init(priv); + + /* netdev init */ + netdev->hw_features |= NETIF_F_SG; + netdev->hw_features |= NETIF_F_IP_CSUM; + netdev->hw_features |= NETIF_F_IPV6_CSUM; + netdev->hw_features |= NETIF_F_GRO; + netdev->hw_features |= NETIF_F_TSO; + netdev->hw_features |= NETIF_F_TSO6; + netdev->hw_features |= NETIF_F_RXCSUM; + netdev->hw_features |= NETIF_F_RXHASH; + + netdev->netdev_ops = &mlx5i_netdev_ops; + netdev->ethtool_ops = &mlx5i_ethtool_ops; +} + +/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ +static void mlx5i_cleanup(struct mlx5e_priv *priv) +{ + /* Do nothing .. */ +} + +int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_qp *qp = &ipriv->qp; + struct mlx5_qp_context *context; + int ret; + + /* QP states */ + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + context->pri_path.port = 1; + context->pri_path.pkey_index = cpu_to_be16(ipriv->pkey_index); + context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret); + goto err_qp_modify_to_err; + } + memset(context, 0, sizeof(*context)); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret); + goto err_qp_modify_to_err; + } + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret); + goto err_qp_modify_to_err; + } + + kfree(context); + return 0; + +err_qp_modify_to_err: + mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, &context, qp); + kfree(context); + return ret; +} + +void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_qp_context context; + int err; + + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context, + &ipriv->qp); + if (err) + mlx5_core_err(mdev, "Failed to modify qp 2RST, err: %d\n", err); +} + +#define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2 + +int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +{ + u32 *in = NULL; + void *addr_path; + int ret = 0; + int inlen; + void *qpc; + + inlen = MLX5_ST_SZ_BYTES(create_qp_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, + MLX5_QP_ENHANCED_ULP_STATELESS_MODE); + + addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, addr_path, vhca_port_num, 1); + MLX5_SET(ads, addr_path, grh, 1); + + ret = mlx5_core_create_qp(mdev, qp, in, inlen); + if (ret) { + mlx5_core_err(mdev, "Failed creating IPoIB QP err : %d\n", ret); + goto out; + } + +out: + kvfree(in); + return ret; +} + +void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +{ + mlx5_core_destroy_qp(mdev, qp); +} + +static int mlx5i_init_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp); + if (err) { + mlx5_core_warn(priv->mdev, "create underlay QP failed, %d\n", err); + return err; + } + + err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); + if (err) { + mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); + goto err_destroy_underlay_qp; + } + + return 0; + +err_destroy_underlay_qp: + mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); + return err; +} + +static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5e_destroy_tis(priv->mdev, priv->tisn[0]); + mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); +} + +static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) +{ + struct ttc_params ttc_params = {}; + int tt, err; + + priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + + if (!priv->fs.ns) + return -EINVAL; + + err = mlx5e_arfs_create_tables(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n", + err); + priv->netdev->hw_features &= ~NETIF_F_NTUPLE; + } + + mlx5e_set_ttc_basic_params(priv, &ttc_params); + mlx5e_set_inner_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; + + err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); + if (err) { + netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + + mlx5e_set_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; + + err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); + if (err) { + netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", + err); + goto err_destroy_inner_ttc_table; + } + + return 0; + +err_destroy_inner_ttc_table: + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); +err_destroy_arfs_tables: + mlx5e_arfs_destroy_tables(priv); + + return err; +} + +static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) +{ + mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); + mlx5e_arfs_destroy_tables(priv); +} + +static int mlx5i_init_rx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_indirect_rqt(priv); + if (err) + return err; + + err = mlx5e_create_direct_rqts(priv); + if (err) + goto err_destroy_indirect_rqts; + + err = mlx5e_create_indirect_tirs(priv); + if (err) + goto err_destroy_direct_rqts; + + err = mlx5e_create_direct_tirs(priv); + if (err) + goto err_destroy_indirect_tirs; + + err = mlx5i_create_flow_steering(priv); + if (err) + goto err_destroy_direct_tirs; + + return 0; + +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_indirect_tirs: + mlx5e_destroy_indirect_tirs(priv); +err_destroy_direct_rqts: + mlx5e_destroy_direct_rqts(priv); +err_destroy_indirect_rqts: + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + return err; +} + +static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) +{ + mlx5i_destroy_flow_steering(priv); + mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_indirect_tirs(priv); + mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); +} + +static const struct mlx5e_profile mlx5i_nic_profile = { + .init = mlx5i_init, + .cleanup = mlx5i_cleanup, + .init_tx = mlx5i_init_tx, + .cleanup_tx = mlx5i_cleanup_tx, + .init_rx = mlx5i_init_rx, + .cleanup_rx = mlx5i_cleanup_rx, + .enable = NULL, /* mlx5i_enable */ + .disable = NULL, /* mlx5i_disable */ + .update_stats = NULL, /* mlx5i_update_stats */ + .max_nch = mlx5e_get_max_num_channels, + .update_carrier = NULL, /* no HW update in IB link */ + .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, + .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ + .max_tc = MLX5I_MAX_NUM_TC, +}; + +/* mlx5i netdev NDos */ + +static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_channels new_channels = {}; + struct mlx5e_params *params; + int err = 0; + + mutex_lock(&priv->state_lock); + + params = &priv->channels.params; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + params->sw_mtu = new_mtu; + netdev->mtu = params->sw_mtu; + goto out; + } + + new_channels.params = *params; + new_channels.params.sw_mtu = new_mtu; + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + netdev->mtu = new_channels.params.sw_mtu; + +out: + mutex_unlock(&priv->state_lock); + return err; +} + +int mlx5i_dev_init(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = priv->ppriv; + + /* Set dev address using underlay QP */ + dev->dev_addr[1] = (ipriv->qp.qpn >> 16) & 0xff; + dev->dev_addr[2] = (ipriv->qp.qpn >> 8) & 0xff; + dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff; + + /* Add QPN to net-device mapping to HT */ + mlx5i_pkey_add_qpn(dev ,ipriv->qp.qpn); + + return 0; +} + +int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return mlx5e_hwstamp_set(priv, ifr); + case SIOCGHWTSTAMP: + return mlx5e_hwstamp_get(priv, ifr); + default: + return -EOPNOTSUPP; + } +} + +void mlx5i_dev_cleanup(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5i_uninit_underlay_qp(priv); + + /* Delete QPN to net-device mapping from HT */ + mlx5i_pkey_del_qpn(dev, ipriv->qp.qpn); +} + +static int mlx5i_open(struct net_device *netdev) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; + int err; + + mutex_lock(&epriv->state_lock); + + set_bit(MLX5E_STATE_OPENED, &epriv->state); + + err = mlx5i_init_underlay_qp(epriv); + if (err) { + mlx5_core_warn(mdev, "prepare underlay qp state failed, %d\n", err); + goto err_clear_state_opened_flag; + } + + err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn); + if (err) { + mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err); + goto err_reset_qp; + } + + err = mlx5e_open_channels(epriv, &epriv->channels); + if (err) + goto err_remove_fs_underlay_qp; + + mlx5e_refresh_tirs(epriv, false); + mlx5e_activate_priv_channels(epriv); + + mutex_unlock(&epriv->state_lock); + return 0; + +err_remove_fs_underlay_qp: + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); +err_reset_qp: + mlx5i_uninit_underlay_qp(epriv); +err_clear_state_opened_flag: + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + mutex_unlock(&epriv->state_lock); + return err; +} + +static int mlx5i_close(struct net_device *netdev) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; + + /* May already be CLOSED in case a previous configuration operation + * (e.g RX/TX queue size change) that involves close&open failed. + */ + mutex_lock(&epriv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &epriv->state)) + goto unlock; + + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + + netif_carrier_off(epriv->netdev); + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); + mlx5e_deactivate_priv_channels(epriv); + mlx5e_close_channels(&epriv->channels); + mlx5i_uninit_underlay_qp(epriv); +unlock: + mutex_unlock(&epriv->state_lock); + return 0; +} + +/* IPoIB RDMA netdev callbacks */ +static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca, + union ib_gid *gid, u16 lid, int set_qkey, + u32 qkey) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = epriv->mdev; + struct mlx5i_priv *ipriv = epriv->ppriv; + int err; + + mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw); + err = mlx5_core_attach_mcg(mdev, gid, ipriv->qp.qpn); + if (err) + mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n", + ipriv->qp.qpn, gid->raw); + + if (set_qkey) { + mlx5_core_dbg(mdev, "%s setting qkey 0x%x\n", + netdev->name, qkey); + ipriv->qkey = qkey; + } + + return err; +} + +static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca, + union ib_gid *gid, u16 lid) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = epriv->mdev; + struct mlx5i_priv *ipriv = epriv->ppriv; + int err; + + mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw); + + err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn); + if (err) + mlx5_core_dbg(mdev, "failed detaching QPN 0x%x, MGID %pI6\n", + ipriv->qp.qpn, gid->raw); + + return err; +} + +static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, + struct ib_ah *address, u32 dqpn) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)]; + struct mlx5_ib_ah *mah = to_mah(address); + struct mlx5i_priv *ipriv = epriv->ppriv; + + return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey); +} + +static void mlx5i_set_pkey_index(struct net_device *netdev, int id) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + + ipriv->pkey_index = (u16)id; +} + +static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) + return -EOPNOTSUPP; + + if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) { + mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +static void mlx5_rdma_netdev_free(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = priv->ppriv; + const struct mlx5e_profile *profile = priv->profile; + + mlx5e_detach_netdev(priv); + profile->cleanup(priv); + destroy_workqueue(priv->wq); + + if (!ipriv->sub_interface) { + mlx5i_pkey_qpn_ht_cleanup(netdev); + mlx5e_destroy_mdev_resources(priv->mdev); + } +} + +struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, + struct ib_device *ibdev, + const char *name, + void (*setup)(struct net_device *)) +{ + const struct mlx5e_profile *profile; + struct net_device *netdev; + struct mlx5i_priv *ipriv; + struct mlx5e_priv *epriv; + struct rdma_netdev *rn; + bool sub_interface; + int nch; + int err; + + if (mlx5i_check_required_hca_cap(mdev)) { + mlx5_core_warn(mdev, "Accelerated mode is not supported\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + /* TODO: Need to find a better way to check if child device*/ + sub_interface = (mdev->mlx5e_res.pdn != 0); + + if (sub_interface) + profile = mlx5i_pkey_get_profile(); + else + profile = &mlx5i_nic_profile; + + nch = profile->max_nch(mdev); + + netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv), + name, NET_NAME_UNKNOWN, + setup, + nch * MLX5E_MAX_NUM_TC, + nch); + if (!netdev) { + mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n"); + return NULL; + } + + ipriv = netdev_priv(netdev); + epriv = mlx5i_epriv(netdev); + + epriv->wq = create_singlethread_workqueue("mlx5i"); + if (!epriv->wq) + goto err_free_netdev; + + ipriv->sub_interface = sub_interface; + if (!ipriv->sub_interface) { + err = mlx5i_pkey_qpn_ht_init(netdev); + if (err) { + mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n"); + goto destroy_wq; + } + + /* This should only be called once per mdev */ + err = mlx5e_create_mdev_resources(mdev); + if (err) + goto destroy_ht; + } + + profile->init(mdev, netdev, profile, ipriv); + + err = mlx5e_attach_netdev(epriv); + if (err) + goto detach; + netif_carrier_off(netdev); + + /* set rdma_netdev func pointers */ + rn = &ipriv->rn; + rn->hca = ibdev; + rn->send = mlx5i_xmit; + rn->attach_mcast = mlx5i_attach_mcast; + rn->detach_mcast = mlx5i_detach_mcast; + rn->set_id = mlx5i_set_pkey_index; + + netdev->priv_destructor = mlx5_rdma_netdev_free; + netdev->needs_free_netdev = 1; + + return netdev; + +detach: + profile->cleanup(epriv); + if (ipriv->sub_interface) + return NULL; + mlx5e_destroy_mdev_resources(mdev); +destroy_ht: + mlx5i_pkey_qpn_ht_cleanup(netdev); +destroy_wq: + destroy_workqueue(epriv->wq); +err_free_netdev: + free_netdev(netdev); + + return NULL; +} +EXPORT_SYMBOL(mlx5_rdma_netdev_alloc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h new file mode 100644 index 000000000..0982c579e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_IPOB_H__ +#define __MLX5E_IPOB_H__ + +#ifdef CONFIG_MLX5_CORE_IPOIB + +#include <linux/mlx5/fs.h> +#include "en.h" + +#define MLX5I_MAX_NUM_TC 1 + +extern const struct ethtool_ops mlx5i_ethtool_ops; +extern const struct ethtool_ops mlx5i_pkey_ethtool_ops; + +#define MLX5_IB_GRH_BYTES 40 +#define MLX5_IPOIB_ENCAP_LEN 4 +#define MLX5_IPOIB_PSEUDO_LEN 20 +#define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN) + +/* ipoib rdma netdev's private data structure */ +struct mlx5i_priv { + struct rdma_netdev rn; /* keep this first */ + struct mlx5_core_qp qp; + bool sub_interface; + u32 qkey; + u16 pkey_index; + struct mlx5i_pkey_qpn_ht *qpn_htbl; + char *mlx5e_priv[0]; +}; + +/* Underlay QP create/destroy functions */ +int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); +void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); + +/* Underlay QP state modification init/uninit functions */ +int mlx5i_init_underlay_qp(struct mlx5e_priv *priv); +void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv); + +/* Allocate/Free underlay QPN to net-device hash table */ +int mlx5i_pkey_qpn_ht_init(struct net_device *netdev); +void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev); + +/* Add/Remove an underlay QPN to net-device mapping to/from the hash table */ +int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn); +int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn); + +/* Get the net-device corresponding to the given underlay QPN */ +struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn); + +/* Shared ndo functions */ +int mlx5i_dev_init(struct net_device *dev); +void mlx5i_dev_cleanup(struct net_device *dev); +int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); + +/* Parent profile functions */ +void mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv); + +/* Get child interface nic profile */ +const struct mlx5e_profile *mlx5i_pkey_get_profile(void); + +/* Extract mlx5e_priv from IPoIB netdev */ +#define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv)) + +struct mlx5_wqe_eth_pad { + u8 rsvd0[16]; +}; + +struct mlx5i_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_datagram_seg datagram; + struct mlx5_wqe_eth_pad pad; + struct mlx5_wqe_eth_seg eth; + struct mlx5_wqe_data_seg data[0]; +}; + +static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq, + struct mlx5i_tx_wqe **wqe, + u16 pi) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + + *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memset(*wqe, 0, sizeof(**wqe)); +} + +netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_av *av, u32 dqpn, u32 dqkey); +void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +#endif /* CONFIG_MLX5_CORE_IPOIB */ +#endif /* __MLX5E_IPOB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c new file mode 100644 index 000000000..54a188f41 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/hash.h> +#include "ipoib.h" + +#define MLX5I_MAX_LOG_PKEY_SUP 7 + +struct qpn_to_netdev { + struct net_device *netdev; + struct hlist_node hlist; + u32 underlay_qpn; +}; + +struct mlx5i_pkey_qpn_ht { + struct hlist_head buckets[1 << MLX5I_MAX_LOG_PKEY_SUP]; + spinlock_t ht_lock; /* Synchronise with NAPI */ +}; + +int mlx5i_pkey_qpn_ht_init(struct net_device *netdev) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct mlx5i_pkey_qpn_ht *qpn_htbl; + + qpn_htbl = kzalloc(sizeof(*qpn_htbl), GFP_KERNEL); + if (!qpn_htbl) + return -ENOMEM; + + ipriv->qpn_htbl = qpn_htbl; + spin_lock_init(&qpn_htbl->ht_lock); + + return 0; +} + +void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + + kfree(ipriv->qpn_htbl); +} + +static struct qpn_to_netdev *mlx5i_find_qpn_to_netdev_node(struct hlist_head *buckets, + u32 qpn) +{ + struct hlist_head *h = &buckets[hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP)]; + struct qpn_to_netdev *node; + + hlist_for_each_entry(node, h, hlist) { + if (node->underlay_qpn == qpn) + return node; + } + + return NULL; +} + +int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl; + u8 key = hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP); + struct qpn_to_netdev *new_node; + + new_node = kzalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; + + new_node->netdev = netdev; + new_node->underlay_qpn = qpn; + spin_lock_bh(&ht->ht_lock); + hlist_add_head(&new_node->hlist, &ht->buckets[key]); + spin_unlock_bh(&ht->ht_lock); + + return 0; +} + +int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl; + struct qpn_to_netdev *node; + + node = mlx5i_find_qpn_to_netdev_node(ht->buckets, qpn); + if (!node) { + mlx5_core_warn(epriv->mdev, "QPN to netdev delete from HT failed\n"); + return -EINVAL; + } + + spin_lock_bh(&ht->ht_lock); + hlist_del_init(&node->hlist); + spin_unlock_bh(&ht->ht_lock); + kfree(node); + + return 0; +} + +struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct qpn_to_netdev *node; + + node = mlx5i_find_qpn_to_netdev_node(ipriv->qpn_htbl->buckets, qpn); + if (!node) + return NULL; + + return node->netdev; +} + +static int mlx5i_pkey_open(struct net_device *netdev); +static int mlx5i_pkey_close(struct net_device *netdev); +static int mlx5i_pkey_dev_init(struct net_device *dev); +static void mlx5i_pkey_dev_cleanup(struct net_device *netdev); +static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu); +static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); + +static const struct net_device_ops mlx5i_pkey_netdev_ops = { + .ndo_open = mlx5i_pkey_open, + .ndo_stop = mlx5i_pkey_close, + .ndo_init = mlx5i_pkey_dev_init, + .ndo_uninit = mlx5i_pkey_dev_cleanup, + .ndo_change_mtu = mlx5i_pkey_change_mtu, + .ndo_do_ioctl = mlx5i_pkey_ioctl, +}; + +/* Child NDOs */ +static int mlx5i_pkey_dev_init(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + int parent_ifindex; + + ipriv = priv->ppriv; + + /* Get QPN to netdevice hash table from parent */ + parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev); + parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex); + if (!parent_dev) { + mlx5_core_warn(priv->mdev, "failed to get parent device\n"); + return -EINVAL; + } + + parent_ipriv = netdev_priv(parent_dev); + ipriv->qpn_htbl = parent_ipriv->qpn_htbl; + dev_put(parent_dev); + + return mlx5i_dev_init(dev); +} + +static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + return mlx5i_ioctl(dev, ifr, cmd); +} + +static void mlx5i_pkey_dev_cleanup(struct net_device *netdev) +{ + return mlx5i_dev_cleanup(netdev); +} + +static int mlx5i_pkey_open(struct net_device *netdev) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; + int err; + + mutex_lock(&epriv->state_lock); + + set_bit(MLX5E_STATE_OPENED, &epriv->state); + + err = mlx5i_init_underlay_qp(epriv); + if (err) { + mlx5_core_warn(mdev, "prepare child underlay qp state failed, %d\n", err); + goto err_release_lock; + } + + err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn); + if (err) { + mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err); + goto err_unint_underlay_qp; + } + + err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]); + if (err) { + mlx5_core_warn(mdev, "create child tis failed, %d\n", err); + goto err_remove_rx_uderlay_qp; + } + + err = mlx5e_open_channels(epriv, &epriv->channels); + if (err) { + mlx5_core_warn(mdev, "opening child channels failed, %d\n", err); + goto err_clear_state_opened_flag; + } + mlx5e_refresh_tirs(epriv, false); + mlx5e_activate_priv_channels(epriv); + mutex_unlock(&epriv->state_lock); + + return 0; + +err_clear_state_opened_flag: + mlx5e_destroy_tis(mdev, epriv->tisn[0]); +err_remove_rx_uderlay_qp: + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); +err_unint_underlay_qp: + mlx5i_uninit_underlay_qp(epriv); +err_release_lock: + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + mutex_unlock(&epriv->state_lock); + return err; +} + +static int mlx5i_pkey_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + netif_carrier_off(priv->netdev); + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); + mlx5i_uninit_underlay_qp(priv); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + mlx5e_destroy_tis(mdev, priv->tisn[0]); +unlock: + mutex_unlock(&priv->state_lock); + return 0; +} + +static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + mutex_lock(&priv->state_lock); + netdev->mtu = new_mtu; + mutex_unlock(&priv->state_lock); + + return 0; +} + +/* Called directly after IPoIB netdevice was created to initialize SW structs */ +static void mlx5i_pkey_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + mlx5i_init(mdev, netdev, profile, ppriv); + + /* Override parent ndo */ + netdev->netdev_ops = &mlx5i_pkey_netdev_ops; + + /* Set child limited ethtool support */ + netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops; + + /* Use dummy rqs */ + priv->channels.params.log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; +} + +/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ +static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv) +{ + /* Do nothing .. */ +} + +static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp); + if (err) { + mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err); + return err; + } + + return 0; +} + +static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); +} + +static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv) +{ + /* Since the rx resources are shared between child and parent, the + * parent interface is taking care of rx resource allocation and init + */ + return 0; +} + +static void mlx5i_pkey_cleanup_rx(struct mlx5e_priv *priv) +{ + /* Since the rx resources are shared between child and parent, the + * parent interface is taking care of rx resource free and de-init + */ +} + +static const struct mlx5e_profile mlx5i_pkey_nic_profile = { + .init = mlx5i_pkey_init, + .cleanup = mlx5i_pkey_cleanup, + .init_tx = mlx5i_pkey_init_tx, + .cleanup_tx = mlx5i_pkey_cleanup_tx, + .init_rx = mlx5i_pkey_init_rx, + .cleanup_rx = mlx5i_pkey_cleanup_rx, + .enable = NULL, + .disable = NULL, + .update_stats = NULL, + .max_nch = mlx5e_get_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, + .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ + .max_tc = MLX5I_MAX_NUM_TC, +}; + +const struct mlx5e_profile *mlx5i_pkey_get_profile(void) +{ + return &mlx5i_pkey_nic_profile; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c new file mode 100644 index 000000000..582b2f180 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -0,0 +1,691 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/netdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> +#include "mlx5_core.h" + +enum { + MLX5_LAG_FLAG_BONDED = 1 << 0, +}; + +struct lag_func { + struct mlx5_core_dev *dev; + struct net_device *netdev; +}; + +/* Used for collection of netdev event info. */ +struct lag_tracker { + enum netdev_lag_tx_type tx_type; + struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; + bool is_bonded; +}; + +/* LAG data of a ConnectX card. + * It serves both its phys functions. + */ +struct mlx5_lag { + u8 flags; + u8 v2p_map[MLX5_MAX_PORTS]; + struct lag_func pf[MLX5_MAX_PORTS]; + struct lag_tracker tracker; + struct delayed_work bond_work; + struct notifier_block nb; + + /* Admin state. Allow lag only if allowed is true + * even if network conditions for lag were met + */ + bool allowed; +}; + +/* General purpose, use for short periods of time. + * Beware of lock dependencies (preferably, no locks should be acquired + * under it). + */ +static DEFINE_MUTEX(lag_mutex); + +static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, + u8 remap_port2) +{ + u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_lag_out)] = {0}; + void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); + + MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); + + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, + u8 remap_port2) +{ + u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(modify_lag_out)] = {0}; + void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); + + MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); + MLX5_SET(modify_lag_in, in, field_select, 0x1); + + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0}; + + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0}; + + MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); + +int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0}; + + MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); + +static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, + bool reset, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { }; + + MLX5_SET(query_cong_statistics_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_STATISTICS); + MLX5_SET(query_cong_statistics_in, in, clear, reset); + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); +} + +static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev) +{ + return dev->priv.lag; +} + +static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].netdev == ndev) + return i; + + return -1; +} + +static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_BONDED); +} + +static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, + u8 *port1, u8 *port2) +{ + *port1 = 1; + *port2 = 2; + if (!tracker->netdev_state[0].tx_enabled || + !tracker->netdev_state[0].link_up) { + *port1 = 2; + return; + } + + if (!tracker->netdev_state[1].tx_enabled || + !tracker->netdev_state[1].link_up) + *port2 = 1; +} + +static void mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; + + ldev->flags |= MLX5_LAG_FLAG_BONDED; + + mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0], + &ldev->v2p_map[1]); + + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]); + if (err) + mlx5_core_err(dev0, + "Failed to create LAG (%d)\n", + err); +} + +static void mlx5_deactivate_lag(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; + + ldev->flags &= ~MLX5_LAG_FLAG_BONDED; + + err = mlx5_cmd_destroy_lag(dev0); + if (err) + mlx5_core_err(dev0, + "Failed to destroy LAG (%d)\n", + err); +} + +static void mlx5_do_bond(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + struct mlx5_core_dev *dev1 = ldev->pf[1].dev; + struct lag_tracker tracker; + u8 v2p_port1, v2p_port2; + int i, err; + bool do_bond; + + if (!dev0 || !dev1) + return; + + mutex_lock(&lag_mutex); + tracker = ldev->tracker; + mutex_unlock(&lag_mutex); + + do_bond = tracker.is_bonded && ldev->allowed; + + if (do_bond && !mlx5_lag_is_bonded(ldev)) { + for (i = 0; i < MLX5_MAX_PORTS; i++) + mlx5_remove_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); + + mlx5_activate_lag(ldev, &tracker); + + mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_enable_roce(dev1); + } else if (do_bond && mlx5_lag_is_bonded(ldev)) { + mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1, + &v2p_port2); + + if ((v2p_port1 != ldev->v2p_map[0]) || + (v2p_port2 != ldev->v2p_map[1])) { + ldev->v2p_map[0] = v2p_port1; + ldev->v2p_map[1] = v2p_port2; + + err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); + if (err) + mlx5_core_err(dev0, + "Failed to modify LAG (%d)\n", + err); + } + } else if (!do_bond && mlx5_lag_is_bonded(ldev)) { + mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_disable_roce(dev1); + + mlx5_deactivate_lag(ldev); + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + mlx5_add_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); + } +} + +static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) +{ + schedule_delayed_work(&ldev->bond_work, delay); +} + +static void mlx5_do_bond_work(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, + bond_work); + int status; + + status = mlx5_dev_list_trylock(); + if (!status) { + /* 1 sec delay. */ + mlx5_queue_bond_work(ldev, HZ); + return; + } + + mlx5_do_bond(ldev); + mlx5_dev_list_unlock(); +} + +static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *upper = info->upper_dev, *ndev_tmp; + struct netdev_lag_upper_info *lag_upper_info = NULL; + bool is_bonded; + int bond_status = 0; + int num_slaves = 0; + int idx; + + if (!netif_is_lag_master(upper)) + return 0; + + if (info->linking) + lag_upper_info = info->upper_info; + + /* The event may still be of interest if the slave does not belong to + * us, but is enslaved to a master which has one or more of our netdevs + * as slaves (e.g., if a new slave is added to a master that bonds two + * of our netdevs, we should unbond). + */ + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper, ndev_tmp) { + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); + if (idx > -1) + bond_status |= (1 << idx); + + num_slaves++; + } + rcu_read_unlock(); + + /* None of this lagdev's netdevs are slaves of this master. */ + if (!(bond_status & 0x3)) + return 0; + + if (lag_upper_info) + tracker->tx_type = lag_upper_info->tx_type; + + /* Determine bonding status: + * A device is considered bonded if both its physical ports are slaves + * of the same lag master, and only them. + * Lag mode must be activebackup or hash. + */ + is_bonded = (num_slaves == MLX5_MAX_PORTS) && + (bond_status == 0x3) && + ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) || + (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)); + + if (tracker->is_bonded != is_bonded) { + tracker->is_bonded = is_bonded; + return 1; + } + + return 0; +} + +static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev, + struct netdev_notifier_changelowerstate_info *info) +{ + struct netdev_lag_lower_state_info *lag_lower_info; + int idx; + + if (!netif_is_lag_port(ndev)) + return 0; + + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); + if (idx == -1) + return 0; + + /* This information is used to determine virtual to physical + * port mapping. + */ + lag_lower_info = info->lower_state_info; + if (!lag_lower_info) + return 0; + + tracker->netdev_state[idx] = *lag_lower_info; + + return 1; +} + +static int mlx5_lag_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct lag_tracker tracker; + struct mlx5_lag *ldev; + int changed = 0; + + if (!net_eq(dev_net(ndev), &init_net)) + return NOTIFY_DONE; + + if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE)) + return NOTIFY_DONE; + + ldev = container_of(this, struct mlx5_lag, nb); + tracker = ldev->tracker; + + switch (event) { + case NETDEV_CHANGEUPPER: + changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev, + ptr); + break; + case NETDEV_CHANGELOWERSTATE: + changed = mlx5_handle_changelowerstate_event(ldev, &tracker, + ndev, ptr); + break; + } + + mutex_lock(&lag_mutex); + ldev->tracker = tracker; + mutex_unlock(&lag_mutex); + + if (changed) + mlx5_queue_bond_work(ldev, 0); + + return NOTIFY_DONE; +} + +static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) +{ + if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) || + (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev))) + return false; + else + return true; +} + +static struct mlx5_lag *mlx5_lag_dev_alloc(void) +{ + struct mlx5_lag *ldev; + + ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); + if (!ldev) + return NULL; + + INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); + ldev->allowed = mlx5_lag_check_prereq(ldev); + + return ldev; +} + +static void mlx5_lag_dev_free(struct mlx5_lag *ldev) +{ + kfree(ldev); +} + +static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev, + struct net_device *netdev) +{ + unsigned int fn = PCI_FUNC(dev->pdev->devfn); + + if (fn >= MLX5_MAX_PORTS) + return; + + mutex_lock(&lag_mutex); + ldev->pf[fn].dev = dev; + ldev->pf[fn].netdev = netdev; + ldev->tracker.netdev_state[fn].link_up = 0; + ldev->tracker.netdev_state[fn].tx_enabled = 0; + + ldev->allowed = mlx5_lag_check_prereq(ldev); + dev->priv.lag = ldev; + + mutex_unlock(&lag_mutex); +} + +static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev == dev) + break; + + if (i == MLX5_MAX_PORTS) + return; + + mutex_lock(&lag_mutex); + memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); + + dev->priv.lag = NULL; + ldev->allowed = mlx5_lag_check_prereq(ldev); + mutex_unlock(&lag_mutex); +} + +/* Must be called with intf_mutex held */ +void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) +{ + struct mlx5_lag *ldev = NULL; + struct mlx5_core_dev *tmp_dev; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)) + return; + + tmp_dev = mlx5_get_next_phys_dev(dev); + if (tmp_dev) + ldev = tmp_dev->priv.lag; + + if (!ldev) { + ldev = mlx5_lag_dev_alloc(); + if (!ldev) { + mlx5_core_err(dev, "Failed to alloc lag dev\n"); + return; + } + } + + mlx5_lag_dev_add_pf(ldev, dev, netdev); + + if (!ldev->nb.notifier_call) { + ldev->nb.notifier_call = mlx5_lag_netdev_event; + if (register_netdevice_notifier(&ldev->nb)) { + ldev->nb.notifier_call = NULL; + mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); + } + } +} + +/* Must be called with intf_mutex held */ +void mlx5_lag_remove(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + int i; + + ldev = mlx5_lag_dev_get(dev); + if (!ldev) + return; + + if (mlx5_lag_is_bonded(ldev)) + mlx5_deactivate_lag(ldev); + + mlx5_lag_dev_remove_pf(ldev, dev); + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + break; + + if (i == MLX5_MAX_PORTS) { + if (ldev->nb.notifier_call) + unregister_netdevice_notifier(&ldev->nb); + cancel_delayed_work_sync(&ldev->bond_work); + mlx5_lag_dev_free(ldev); + } +} + +bool mlx5_lag_is_active(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + res = ldev && mlx5_lag_is_bonded(ldev); + mutex_unlock(&lag_mutex); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_active); + +static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow) +{ + struct mlx5_lag *ldev; + int ret = 0; + bool lag_active; + + mlx5_dev_list_lock(); + + ldev = mlx5_lag_dev_get(dev); + if (!ldev) { + ret = -ENODEV; + goto unlock; + } + lag_active = mlx5_lag_is_bonded(ldev); + if (!mlx5_lag_check_prereq(ldev) && allow) { + ret = -EINVAL; + goto unlock; + } + if (ldev->allowed == allow) + goto unlock; + ldev->allowed = allow; + if ((lag_active && !allow) || allow) + mlx5_do_bond(ldev); +unlock: + mlx5_dev_list_unlock(); + return ret; +} + +int mlx5_lag_forbid(struct mlx5_core_dev *dev) +{ + return mlx5_lag_set_state(dev, false); +} + +int mlx5_lag_allow(struct mlx5_core_dev *dev) +{ + return mlx5_lag_set_state(dev, true); +} + +struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) +{ + struct net_device *ndev = NULL; + struct mlx5_lag *ldev; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + + if (!(ldev && mlx5_lag_is_bonded(ldev))) + goto unlock; + + if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + ndev = ldev->tracker.netdev_state[0].tx_enabled ? + ldev->pf[0].netdev : ldev->pf[1].netdev; + } else { + ndev = ldev->pf[0].netdev; + } + if (ndev) + dev_hold(ndev); + +unlock: + mutex_unlock(&lag_mutex); + + return ndev; +} +EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); + +bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, + priv); + struct mlx5_lag *ldev; + + if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB) + return true; + + ldev = mlx5_lag_dev_get(dev); + if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev) + return true; + + /* If bonded, we do not add an IB device for PF1. */ + return false; +} + +int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + u64 *values, + int num_counters, + size_t *offsets) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); + struct mlx5_core_dev *mdev[MLX5_MAX_PORTS]; + struct mlx5_lag *ldev; + int num_ports; + int ret, i, j; + void *out; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + memset(values, 0, sizeof(*values) * num_counters); + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + if (ldev && mlx5_lag_is_bonded(ldev)) { + num_ports = MLX5_MAX_PORTS; + mdev[0] = ldev->pf[0].dev; + mdev[1] = ldev->pf[1].dev; + } else { + num_ports = 1; + mdev[0] = dev; + } + + for (i = 0; i < num_ports; ++i) { + ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen); + if (ret) + goto unlock; + + for (j = 0; j < num_counters; ++j) + values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); + } + +unlock: + mutex_unlock(&lag_mutex); + kvfree(out); + return ret; +} +EXPORT_SYMBOL(mlx5_lag_query_cong_counters); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile new file mode 100644 index 000000000..d8e17110f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c new file mode 100644 index 000000000..0fd62510f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -0,0 +1,616 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/clocksource.h> +#include <linux/highmem.h> +#include <rdma/mlx5-abi.h> +#include "en.h" +#include "clock.h" + +enum { + MLX5_CYCLES_SHIFT = 23 +}; + +enum { + MLX5_PIN_MODE_IN = 0x0, + MLX5_PIN_MODE_OUT = 0x1, +}; + +enum { + MLX5_OUT_PATTERN_PULSE = 0x0, + MLX5_OUT_PATTERN_PERIODIC = 0x1, +}; + +enum { + MLX5_EVENT_MODE_DISABLE = 0x0, + MLX5_EVENT_MODE_REPETETIVE = 0x1, + MLX5_EVENT_MODE_ONCE_TILL_ARM = 0x2, +}; + +enum { + MLX5_MTPPS_FS_ENABLE = BIT(0x0), + MLX5_MTPPS_FS_PATTERN = BIT(0x2), + MLX5_MTPPS_FS_PIN_MODE = BIT(0x3), + MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4), + MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), + MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), +}; + +static u64 read_internal_timer(const struct cyclecounter *cc) +{ + struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles); + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); + + return mlx5_read_internal_timer(mdev) & cc->mask; +} + +static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_ib_clock_info *clock_info = mdev->clock_info; + struct mlx5_clock *clock = &mdev->clock; + u32 sign; + + if (!clock_info) + return; + + sign = smp_load_acquire(&clock_info->sign); + smp_store_mb(clock_info->sign, + sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING); + + clock_info->cycles = clock->tc.cycle_last; + clock_info->mult = clock->cycles.mult; + clock_info->nsec = clock->tc.nsec; + clock_info->frac = clock->tc.frac; + + smp_store_release(&clock_info->sign, + sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2); +} + +static void mlx5_pps_out(struct work_struct *work) +{ + struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, + out_work); + struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock, + pps_info); + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + unsigned long flags; + int i; + + for (i = 0; i < clock->ptp_info.n_pins; i++) { + u64 tstart; + + write_lock_irqsave(&clock->lock, flags); + tstart = clock->pps_info.start[i]; + clock->pps_info.start[i] = 0; + write_unlock_irqrestore(&clock->lock, flags); + if (!tstart) + continue; + + MLX5_SET(mtpps_reg, in, pin, i); + MLX5_SET64(mtpps_reg, in, time_stamp, tstart); + MLX5_SET(mtpps_reg, in, field_select, MLX5_MTPPS_FS_TIME_STAMP); + mlx5_set_mtpps(mdev, in, sizeof(in)); + } +} + +static void mlx5_timestamp_overflow(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock, + overflow_work); + unsigned long flags; + + write_lock_irqsave(&clock->lock, flags); + timecounter_read(&clock->tc); + mlx5_update_clock_info_page(clock->mdev); + write_unlock_irqrestore(&clock->lock, flags); + schedule_delayed_work(&clock->overflow_work, clock->overflow_period); +} + +static int mlx5_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + u64 ns = timespec64_to_ns(ts); + unsigned long flags; + + write_lock_irqsave(&clock->lock, flags); + timecounter_init(&clock->tc, &clock->cycles, ns); + mlx5_update_clock_info_page(clock->mdev); + write_unlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + u64 ns; + unsigned long flags; + + write_lock_irqsave(&clock->lock, flags); + ns = timecounter_read(&clock->tc); + write_unlock_irqrestore(&clock->lock, flags); + + *ts = ns_to_timespec64(ns); + + return 0; +} + +static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + unsigned long flags; + + write_lock_irqsave(&clock->lock, flags); + timecounter_adjtime(&clock->tc, delta); + mlx5_update_clock_info_page(clock->mdev); + write_unlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) +{ + u64 adj; + u32 diff; + unsigned long flags; + int neg_adj = 0; + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + + if (delta < 0) { + neg_adj = 1; + delta = -delta; + } + + adj = clock->nominal_c_mult; + adj *= delta; + diff = div_u64(adj, 1000000000ULL); + + write_lock_irqsave(&clock->lock, flags); + timecounter_read(&clock->tc); + clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : + clock->nominal_c_mult + diff; + mlx5_update_clock_info_page(clock->mdev); + write_unlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_extts_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev = + container_of(clock, struct mlx5_core_dev, clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u32 field_select = 0; + u8 pin_mode = 0; + u8 pattern = 0; + int pin = -1; + int err = 0; + + if (!MLX5_PPS_CAP(mdev)) + return -EOPNOTSUPP; + + if (rq->extts.index >= clock->ptp_info.n_pins) + return -EINVAL; + + if (on) { + pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index); + if (pin < 0) + return -EBUSY; + pin_mode = MLX5_PIN_MODE_IN; + pattern = !!(rq->extts.flags & PTP_FALLING_EDGE); + field_select = MLX5_MTPPS_FS_PIN_MODE | + MLX5_MTPPS_FS_PATTERN | + MLX5_MTPPS_FS_ENABLE; + } else { + pin = rq->extts.index; + field_select = MLX5_MTPPS_FS_ENABLE; + } + + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET(mtpps_reg, in, field_select, field_select); + + err = mlx5_set_mtpps(mdev, in, sizeof(in)); + if (err) + return err; + + return mlx5_set_mtppse(mdev, pin, 0, + MLX5_EVENT_MODE_REPETETIVE & on); +} + +static int mlx5_perout_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev = + container_of(clock, struct mlx5_core_dev, clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u64 nsec_now, nsec_delta, time_stamp = 0; + u64 cycles_now, cycles_delta; + struct timespec64 ts; + unsigned long flags; + u32 field_select = 0; + u8 pin_mode = 0; + u8 pattern = 0; + int pin = -1; + int err = 0; + s64 ns; + + if (!MLX5_PPS_CAP(mdev)) + return -EOPNOTSUPP; + + if (rq->perout.index >= clock->ptp_info.n_pins) + return -EINVAL; + + if (on) { + pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, + rq->perout.index); + if (pin < 0) + return -EBUSY; + + pin_mode = MLX5_PIN_MODE_OUT; + pattern = MLX5_OUT_PATTERN_PERIODIC; + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + ns = timespec64_to_ns(&ts); + + if ((ns >> 1) != 500000000LL) + return -EINVAL; + + ts.tv_sec = rq->perout.start.sec; + ts.tv_nsec = rq->perout.start.nsec; + ns = timespec64_to_ns(&ts); + cycles_now = mlx5_read_internal_timer(mdev); + write_lock_irqsave(&clock->lock, flags); + nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); + nsec_delta = ns - nsec_now; + cycles_delta = div64_u64(nsec_delta << clock->cycles.shift, + clock->cycles.mult); + write_unlock_irqrestore(&clock->lock, flags); + time_stamp = cycles_now + cycles_delta; + field_select = MLX5_MTPPS_FS_PIN_MODE | + MLX5_MTPPS_FS_PATTERN | + MLX5_MTPPS_FS_ENABLE | + MLX5_MTPPS_FS_TIME_STAMP; + } else { + pin = rq->perout.index; + field_select = MLX5_MTPPS_FS_ENABLE; + } + + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); + MLX5_SET(mtpps_reg, in, field_select, field_select); + + err = mlx5_set_mtpps(mdev, in, sizeof(in)); + if (err) + return err; + + return mlx5_set_mtppse(mdev, pin, 0, + MLX5_EVENT_MODE_REPETETIVE & on); +} + +static int mlx5_pps_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + + clock->pps_info.enabled = !!on; + return 0; +} + +static int mlx5_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + return mlx5_extts_configure(ptp, rq, on); + case PTP_CLK_REQ_PEROUT: + return mlx5_perout_configure(ptp, rq, on); + case PTP_CLK_REQ_PPS: + return mlx5_pps_configure(ptp, rq, on); + default: + return -EOPNOTSUPP; + } + return 0; +} + +enum { + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN = BIT(0), + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT = BIT(1), +}; + +static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + + switch (func) { + case PTP_PF_NONE: + return 0; + case PTP_PF_EXTTS: + return !(clock->pps_info.pin_caps[pin] & + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN); + case PTP_PF_PEROUT: + return !(clock->pps_info.pin_caps[pin] & + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT); + default: + return -EOPNOTSUPP; + } + + return -EOPNOTSUPP; +} + +static const struct ptp_clock_info mlx5_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "mlx5_p2p", + .max_adj = 100000000, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .n_pins = 0, + .pps = 0, + .adjfreq = mlx5_ptp_adjfreq, + .adjtime = mlx5_ptp_adjtime, + .gettime64 = mlx5_ptp_gettime, + .settime64 = mlx5_ptp_settime, + .enable = NULL, + .verify = NULL, +}; + +static int mlx5_init_pin_config(struct mlx5_clock *clock) +{ + int i; + + clock->ptp_info.pin_config = + kcalloc(clock->ptp_info.n_pins, + sizeof(*clock->ptp_info.pin_config), + GFP_KERNEL); + if (!clock->ptp_info.pin_config) + return -ENOMEM; + clock->ptp_info.enable = mlx5_ptp_enable; + clock->ptp_info.verify = mlx5_ptp_verify; + clock->ptp_info.pps = 1; + + for (i = 0; i < clock->ptp_info.n_pins; i++) { + snprintf(clock->ptp_info.pin_config[i].name, + sizeof(clock->ptp_info.pin_config[i].name), + "mlx5_pps%d", i); + clock->ptp_info.pin_config[i].index = i; + clock->ptp_info.pin_config[i].func = PTP_PF_NONE; + clock->ptp_info.pin_config[i].chan = i; + } + + return 0; +} + +static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + mlx5_query_mtpps(mdev, out, sizeof(out)); + + clock->ptp_info.n_pins = MLX5_GET(mtpps_reg, out, + cap_number_of_pps_pins); + clock->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_in_pins); + clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_out_pins); + + clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); + clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); + clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); + clock->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); + clock->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); + clock->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); + clock->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); + clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); +} + +void mlx5_pps_event(struct mlx5_core_dev *mdev, + struct mlx5_eqe *eqe) +{ + struct mlx5_clock *clock = &mdev->clock; + struct ptp_clock_event ptp_event; + struct timespec64 ts; + u64 nsec_now, nsec_delta; + u64 cycles_now, cycles_delta; + int pin = eqe->data.pps.pin; + s64 ns; + unsigned long flags; + + switch (clock->ptp_info.pin_config[pin].func) { + case PTP_PF_EXTTS: + ptp_event.index = pin; + ptp_event.timestamp = + mlx5_timecounter_cyc2time(clock, + be64_to_cpu(eqe->data.pps.time_stamp)); + if (clock->pps_info.enabled) { + ptp_event.type = PTP_CLOCK_PPSUSR; + ptp_event.pps_times.ts_real = + ns_to_timespec64(ptp_event.timestamp); + } else { + ptp_event.type = PTP_CLOCK_EXTTS; + } + ptp_clock_event(clock->ptp, &ptp_event); + break; + case PTP_PF_PEROUT: + mlx5_ptp_gettime(&clock->ptp_info, &ts); + cycles_now = mlx5_read_internal_timer(mdev); + ts.tv_sec += 1; + ts.tv_nsec = 0; + ns = timespec64_to_ns(&ts); + write_lock_irqsave(&clock->lock, flags); + nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); + nsec_delta = ns - nsec_now; + cycles_delta = div64_u64(nsec_delta << clock->cycles.shift, + clock->cycles.mult); + clock->pps_info.start[pin] = cycles_now + cycles_delta; + schedule_work(&clock->pps_info.out_work); + write_unlock_irqrestore(&clock->lock, flags); + break; + default: + mlx5_core_err(mdev, " Unhandled event\n"); + } +} + +void mlx5_init_clock(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + u64 overflow_cycles; + u64 ns; + u64 frac = 0; + u32 dev_freq; + + dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz); + if (!dev_freq) { + mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); + return; + } + rwlock_init(&clock->lock); + clock->cycles.read = read_internal_timer; + clock->cycles.shift = MLX5_CYCLES_SHIFT; + clock->cycles.mult = clocksource_khz2mult(dev_freq, + clock->cycles.shift); + clock->nominal_c_mult = clock->cycles.mult; + clock->cycles.mask = CLOCKSOURCE_MASK(41); + clock->mdev = mdev; + + timecounter_init(&clock->tc, &clock->cycles, + ktime_to_ns(ktime_get_real())); + + /* Calculate period in seconds to call the overflow watchdog - to make + * sure counter is checked at least twice every wrap around. + * The period is calculated as the minimum between max HW cycles count + * (The clock source mask) and max amount of cycles that can be + * multiplied by clock multiplier where the result doesn't exceed + * 64bits. + */ + overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult); + overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3)); + + ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles, + frac, &frac); + do_div(ns, NSEC_PER_SEC / HZ); + clock->overflow_period = ns; + + mdev->clock_info_page = alloc_page(GFP_KERNEL); + if (mdev->clock_info_page) { + mdev->clock_info = kmap(mdev->clock_info_page); + if (!mdev->clock_info) { + __free_page(mdev->clock_info_page); + mlx5_core_warn(mdev, "failed to map clock page\n"); + } else { + mdev->clock_info->sign = 0; + mdev->clock_info->nsec = clock->tc.nsec; + mdev->clock_info->cycles = clock->tc.cycle_last; + mdev->clock_info->mask = clock->cycles.mask; + mdev->clock_info->mult = clock->nominal_c_mult; + mdev->clock_info->shift = clock->cycles.shift; + mdev->clock_info->frac = clock->tc.frac; + mdev->clock_info->overflow_period = + clock->overflow_period; + } + } + + INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); + INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow); + if (clock->overflow_period) + schedule_delayed_work(&clock->overflow_work, 0); + else + mlx5_core_warn(mdev, "invalid overflow period, overflow_work is not scheduled\n"); + + /* Configure the PHC */ + clock->ptp_info = mlx5_ptp_clock_info; + + /* Initialize 1PPS data structures */ + if (MLX5_PPS_CAP(mdev)) + mlx5_get_pps_caps(mdev); + if (clock->ptp_info.n_pins) + mlx5_init_pin_config(clock); + + clock->ptp = ptp_clock_register(&clock->ptp_info, + &mdev->pdev->dev); + if (IS_ERR(clock->ptp)) { + mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n", + PTR_ERR(clock->ptp)); + clock->ptp = NULL; + } +} + +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + if (clock->ptp) { + ptp_clock_unregister(clock->ptp); + clock->ptp = NULL; + } + + cancel_work_sync(&clock->pps_info.out_work); + cancel_delayed_work_sync(&clock->overflow_work); + + if (mdev->clock_info) { + kunmap(mdev->clock_info_page); + __free_page(mdev->clock_info_page); + mdev->clock_info = NULL; + } + + kfree(clock->ptp_info.pin_config); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h new file mode 100644 index 000000000..02e2e4575 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_CLOCK_H__ +#define __LIB_CLOCK_H__ + +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) +void mlx5_init_clock(struct mlx5_core_dev *mdev); +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); +void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); + +static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) +{ + return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1; +} + +static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + u64 nsec; + + read_lock(&clock->lock); + nsec = timecounter_cyc2time(&clock->tc, timestamp); + read_unlock(&clock->lock); + + return ns_to_ktime(nsec); +} + +#else +static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {} +static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {} +static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {} + +static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) +{ + return -1; +} + +static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + return 0; +} +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c new file mode 100644 index 000000000..7722a3f9b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/etherdevice.h> +#include <linux/idr.h> +#include "mlx5_core.h" +#include "lib/mlx5.h" + +void mlx5_init_reserved_gids(struct mlx5_core_dev *dev) +{ + unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size); + + ida_init(&dev->roce.reserved_gids.ida); + dev->roce.reserved_gids.start = tblsz; + dev->roce.reserved_gids.count = 0; +} + +void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev) +{ + WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida)); + dev->roce.reserved_gids.start = 0; + dev->roce.reserved_gids.count = 0; + ida_destroy(&dev->roce.reserved_gids.ida); +} + +int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count) +{ + if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n"); + return -EPERM; + } + if (dev->roce.reserved_gids.start < count) { + mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n", + count); + return -ENOMEM; + } + if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) { + mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count); + return -ENOMEM; + } + + dev->roce.reserved_gids.start -= count; + dev->roce.reserved_gids.count += count; + mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n", + dev->roce.reserved_gids.count, + dev->roce.reserved_gids.start); + return 0; +} + +void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count) +{ + WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up"); + WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved", + count, dev->roce.reserved_gids.count); + + dev->roce.reserved_gids.start += count; + dev->roce.reserved_gids.count -= count; + mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n", + dev->roce.reserved_gids.count, + dev->roce.reserved_gids.start); +} + +int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index) +{ + int end = dev->roce.reserved_gids.start + + dev->roce.reserved_gids.count; + int index = 0; + + index = ida_simple_get(&dev->roce.reserved_gids.ida, + dev->roce.reserved_gids.start, end, + GFP_KERNEL); + if (index < 0) + return index; + + mlx5_core_dbg(dev, "Allocating reserved GID %u\n", index); + *gid_index = index; + return 0; +} + +void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index) +{ + mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index); + ida_simple_remove(&dev->roce.reserved_gids.ida, gid_index); +} + +unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev) +{ + return dev->roce.reserved_gids.count; +} +EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count); + +int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, + u8 roce_version, u8 roce_l3_type, const u8 *gid, + const u8 *mac, bool vlan, u16 vlan_id, u8 port_num) +{ +#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) + u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0}; + void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); + char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr, + source_l3_address); + void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr, + source_mac_47_32); + int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address); + + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -EINVAL; + + if (gid) { + if (vlan) { + MLX5_SET_RA(in_addr, vlan_valid, 1); + MLX5_SET_RA(in_addr, vlan_id, vlan_id); + } + + ether_addr_copy(addr_mac, mac); + MLX5_SET_RA(in_addr, roce_version, roce_version); + MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type); + memcpy(addr_l3_addr, gid, gidsz); + } + + if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0) + MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num); + + MLX5_SET(set_roce_address_in, in, roce_address_index, index); + MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_roce_gid_set); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h new file mode 100644 index 000000000..7550b1cc8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_MLX5_H__ +#define __LIB_MLX5_H__ + +void mlx5_init_reserved_gids(struct mlx5_core_dev *dev); +void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev); +int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); +void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); +int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); +void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c new file mode 100644 index 000000000..98359559c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/eswitch.h> +#include "mlx5_core.h" +#include "lib/mpfs.h" + +/* HW L2 Table (MPFS) management */ +static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0}; + u8 *in_mac_addr; + + MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0}; + + MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +/* UC L2 table hash node */ +struct l2table_node { + struct l2addr_node node; + u32 index; /* index in HW l2 table */ +}; + +struct mlx5_mpfs { + struct hlist_head hash[MLX5_L2_ADDR_HASH_SIZE]; + struct mutex lock; /* Synchronize l2 table access */ + u32 size; + unsigned long *bitmap; +}; + +static int alloc_l2table_index(struct mlx5_mpfs *l2table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2table->bitmap, l2table->size); + if (*ix >= l2table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2table->bitmap); + + return err; +} + +static void free_l2table_index(struct mlx5_mpfs *l2table, u32 ix) +{ + __clear_bit(ix, l2table->bitmap); +} + +int mlx5_mpfs_init(struct mlx5_core_dev *dev) +{ + int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + struct mlx5_mpfs *mpfs; + + if (!MLX5_ESWITCH_MANAGER(dev)) + return 0; + + mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); + if (!mpfs) + return -ENOMEM; + + mutex_init(&mpfs->lock); + mpfs->size = l2table_size; + mpfs->bitmap = kcalloc(BITS_TO_LONGS(l2table_size), + sizeof(uintptr_t), GFP_KERNEL); + if (!mpfs->bitmap) { + kfree(mpfs); + return -ENOMEM; + } + + dev->priv.mpfs = mpfs; + return 0; +} + +void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + + if (!MLX5_ESWITCH_MANAGER(dev)) + return; + + WARN_ON(!hlist_empty(mpfs->hash)); + kfree(mpfs->bitmap); + kfree(mpfs); +} + +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + u32 index; + int err; + + if (!MLX5_ESWITCH_MANAGER(dev)) + return 0; + + mutex_lock(&mpfs->lock); + + l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); + if (l2addr) { + err = -EEXIST; + goto abort; + } + + err = alloc_l2table_index(mpfs, &index); + if (err) + goto abort; + + l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL); + if (!l2addr) { + free_l2table_index(mpfs, index); + err = -ENOMEM; + goto abort; + } + + l2addr->index = index; + err = set_l2table_entry_cmd(dev, index, mac); + if (err) { + l2addr_hash_del(l2addr); + free_l2table_index(mpfs, index); + } + + mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index); +abort: + mutex_unlock(&mpfs->lock); + return err; +} + +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + int err = 0; + u32 index; + + if (!MLX5_ESWITCH_MANAGER(dev)) + return 0; + + mutex_lock(&mpfs->lock); + + l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); + if (!l2addr) { + err = -ENOENT; + goto unlock; + } + + index = l2addr->index; + del_l2table_entry_cmd(dev, index); + l2addr_hash_del(l2addr); + free_l2table_index(mpfs, index); + mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index); +unlock: + mutex_unlock(&mpfs->lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h new file mode 100644 index 000000000..4a7b2c320 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_MPFS_H__ +#define __MLX5_MPFS_H__ + +#include <linux/if_ether.h> +#include <linux/mlx5/device.h> + +/* L2 -mac address based- hash helpers */ +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &(hash)[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &(hash)[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&(ptr)->node.hlist); \ + kfree(ptr); \ +}) + +#ifdef CONFIG_MLX5_MPFS +int mlx5_mpfs_init(struct mlx5_core_dev *dev); +void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac); +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac); +#else /* #ifndef CONFIG_MLX5_MPFS */ +static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {} +static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +#endif +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c new file mode 100644 index 000000000..9a8fd7621 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "vxlan.h" + +struct mlx5_vxlan { + struct mlx5_core_dev *mdev; + spinlock_t lock; /* protect vxlan table */ + /* max_num_ports is usuallly 4, 16 buckets is more than enough */ + DECLARE_HASHTABLE(htable, 4); + int num_ports; + struct mutex sync_lock; /* sync add/del port HW operations */ +}; + +struct mlx5_vxlan_port { + struct hlist_node hlist; + atomic_t refcount; + u16 udp_port; +}; + +static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4; +} + +static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port) +{ + u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0}; + + MLX5_SET(add_vxlan_udp_dport_in, in, opcode, + MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT); + MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port); + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port) +{ + u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0}; + + MLX5_SET(delete_vxlan_udp_dport_in, in, opcode, + MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); + MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port); + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static struct mlx5_vxlan_port* +mlx5_vxlan_lookup_port_locked(struct mlx5_vxlan *vxlan, u16 port) +{ + struct mlx5_vxlan_port *vxlanp; + + hash_for_each_possible(vxlan->htable, vxlanp, hlist, port) { + if (vxlanp->udp_port == port) + return vxlanp; + } + + return NULL; +} + +struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) +{ + struct mlx5_vxlan_port *vxlanp; + + if (!mlx5_vxlan_allowed(vxlan)) + return NULL; + + spin_lock_bh(&vxlan->lock); + vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port); + spin_unlock_bh(&vxlan->lock); + + return vxlanp; +} + +int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) +{ + struct mlx5_vxlan_port *vxlanp; + int ret = -ENOSPC; + + vxlanp = mlx5_vxlan_lookup_port(vxlan, port); + if (vxlanp) { + atomic_inc(&vxlanp->refcount); + return 0; + } + + mutex_lock(&vxlan->sync_lock); + if (vxlan->num_ports >= mlx5_vxlan_max_udp_ports(vxlan->mdev)) { + mlx5_core_info(vxlan->mdev, + "UDP port (%d) not offloaded, max number of UDP ports (%d) are already offloaded\n", + port, mlx5_vxlan_max_udp_ports(vxlan->mdev)); + ret = -ENOSPC; + goto unlock; + } + + ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port); + if (ret) + goto unlock; + + vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL); + if (!vxlanp) { + ret = -ENOMEM; + goto err_delete_port; + } + + vxlanp->udp_port = port; + atomic_set(&vxlanp->refcount, 1); + + spin_lock_bh(&vxlan->lock); + hash_add(vxlan->htable, &vxlanp->hlist, port); + spin_unlock_bh(&vxlan->lock); + + vxlan->num_ports++; + mutex_unlock(&vxlan->sync_lock); + return 0; + +err_delete_port: + mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port); + +unlock: + mutex_unlock(&vxlan->sync_lock); + return ret; +} + +int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) +{ + struct mlx5_vxlan_port *vxlanp; + bool remove = false; + int ret = 0; + + mutex_lock(&vxlan->sync_lock); + + spin_lock_bh(&vxlan->lock); + vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port); + if (!vxlanp) { + ret = -ENOENT; + goto out_unlock; + } + + if (atomic_dec_and_test(&vxlanp->refcount)) { + hash_del(&vxlanp->hlist); + remove = true; + } + +out_unlock: + spin_unlock_bh(&vxlan->lock); + + if (remove) { + mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port); + kfree(vxlanp); + vxlan->num_ports--; + } + + mutex_unlock(&vxlan->sync_lock); + + return ret; +} + +struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev) +{ + struct mlx5_vxlan *vxlan; + + if (!MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || !mlx5_core_is_pf(mdev)) + return ERR_PTR(-ENOTSUPP); + + vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL); + if (!vxlan) + return ERR_PTR(-ENOMEM); + + vxlan->mdev = mdev; + mutex_init(&vxlan->sync_lock); + spin_lock_init(&vxlan->lock); + hash_init(vxlan->htable); + + /* Hardware adds 4789 by default */ + mlx5_vxlan_add_port(vxlan, 4789); + + return vxlan; +} + +void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) +{ + struct mlx5_vxlan_port *vxlanp; + struct hlist_node *tmp; + int bkt; + + if (!mlx5_vxlan_allowed(vxlan)) + return; + + /* Lockless since we are the only hash table consumers*/ + hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) { + hash_del(&vxlanp->hlist); + mlx5_vxlan_core_del_port_cmd(vxlan->mdev, vxlanp->udp_port); + kfree(vxlanp); + } + + kfree(vxlan); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h new file mode 100644 index 000000000..8fb0eb08f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_VXLAN_H__ +#define __MLX5_VXLAN_H__ + +#include <linux/mlx5/driver.h> + +struct mlx5_vxlan; +struct mlx5_vxlan_port; + +static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan) +{ + /* not allowed reason is encoded in vxlan pointer as error, + * on mlx5_vxlan_create + */ + return !IS_ERR_OR_NULL(vxlan); +} + +#if IS_ENABLED(CONFIG_VXLAN) +struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev); +void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan); +int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port); +int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port); +struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port); +#else +static inline struct mlx5_vxlan* +mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-EOPNOTSUPP); } +static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; } +static inline int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; } +static inline int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; } +static inline struct mx5_vxlan_port* +mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) { return NULL; } +#endif + +#endif /* __MLX5_VXLAN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c new file mode 100644 index 000000000..3a3b0005f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port) +{ + int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); + int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); + int err = -ENOMEM; + void *data; + void *resp; + u32 *out; + u32 *in; + + in = kzalloc(inlen, GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); + if (!in || !out) + goto out; + + MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); + MLX5_SET(mad_ifc_in, in, op_mod, opmod); + MLX5_SET(mad_ifc_in, in, port, port); + + data = MLX5_ADDR_OF(mad_ifc_in, in, mad); + memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); + + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + if (err) + goto out; + + resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); + memcpy(outb, resp, + MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); + +out: + kfree(out); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c new file mode 100644 index 000000000..a2b25afa2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -0,0 +1,1721 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/highmem.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/io-mapping.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/srq.h> +#include <linux/debugfs.h> +#include <linux/kmod.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif +#include <linux/version.h> +#include <net/devlink.h> +#include "mlx5_core.h" +#include "fs_core.h" +#include "lib/mpfs.h" +#include "eswitch.h" +#include "lib/mlx5.h" +#include "fpga/core.h" +#include "fpga/ipsec.h" +#include "accel/ipsec.h" +#include "accel/tls.h" +#include "lib/clock.h" +#include "lib/vxlan.h" +#include "diag/fw_tracer.h" + +MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRIVER_VERSION); + +unsigned int mlx5_core_debug_mask; +module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644); +MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0"); + +#define MLX5_DEFAULT_PROF 2 +static unsigned int prof_sel = MLX5_DEFAULT_PROF; +module_param_named(prof_sel, prof_sel, uint, 0444); +MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); + +static u32 sw_owner_id[4]; + +enum { + MLX5_ATOMIC_REQ_MODE_BE = 0x0, + MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, +}; + +static struct mlx5_profile profile[] = { + [0] = { + .mask = 0, + }, + [1] = { + .mask = MLX5_PROF_MASK_QP_SIZE, + .log_max_qp = 12, + }, + [2] = { + .mask = MLX5_PROF_MASK_QP_SIZE | + MLX5_PROF_MASK_MR_CACHE, + .log_max_qp = 18, + .mr_cache[0] = { + .size = 500, + .limit = 250 + }, + .mr_cache[1] = { + .size = 500, + .limit = 250 + }, + .mr_cache[2] = { + .size = 500, + .limit = 250 + }, + .mr_cache[3] = { + .size = 500, + .limit = 250 + }, + .mr_cache[4] = { + .size = 500, + .limit = 250 + }, + .mr_cache[5] = { + .size = 500, + .limit = 250 + }, + .mr_cache[6] = { + .size = 500, + .limit = 250 + }, + .mr_cache[7] = { + .size = 500, + .limit = 250 + }, + .mr_cache[8] = { + .size = 500, + .limit = 250 + }, + .mr_cache[9] = { + .size = 500, + .limit = 250 + }, + .mr_cache[10] = { + .size = 500, + .limit = 250 + }, + .mr_cache[11] = { + .size = 500, + .limit = 250 + }, + .mr_cache[12] = { + .size = 64, + .limit = 32 + }, + .mr_cache[13] = { + .size = 32, + .limit = 16 + }, + .mr_cache[14] = { + .size = 16, + .limit = 8 + }, + .mr_cache[15] = { + .size = 8, + .limit = 4 + }, + }, +}; + +#define FW_INIT_TIMEOUT_MILI 2000 +#define FW_INIT_WAIT_MS 2 +#define FW_PRE_INIT_TIMEOUT_MILI 10000 + +static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili) +{ + unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); + int err = 0; + + while (fw_initializing(dev)) { + if (time_after(jiffies, end)) { + err = -EBUSY; + break; + } + msleep(FW_INIT_WAIT_MS); + } + + return err; +} + +static void mlx5_set_driver_version(struct mlx5_core_dev *dev) +{ + int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in, + driver_version); + u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {0}; + u8 out[MLX5_ST_SZ_BYTES(set_driver_version_out)] = {0}; + int remaining_size = driver_ver_sz; + char *string; + + if (!MLX5_CAP_GEN(dev, driver_version)) + return; + + string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version); + + strncpy(string, "Linux", remaining_size); + + remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); + strncat(string, ",", remaining_size); + + remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); + strncat(string, DRIVER_NAME, remaining_size); + + remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); + strncat(string, ",", remaining_size); + + remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); + + snprintf(string + strlen(string), remaining_size, "%u.%u.%u", + (u8)((LINUX_VERSION_CODE >> 16) & 0xff), (u8)((LINUX_VERSION_CODE >> 8) & 0xff), + (u16)(LINUX_VERSION_CODE & 0xffff)); + + /*Send the command*/ + MLX5_SET(set_driver_version_in, in, opcode, + MLX5_CMD_OP_SET_DRIVER_VERSION); + + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int set_dma_caps(struct pci_dev *pdev) +{ + int err; + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) { + dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n"); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n"); + return err; + } + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) { + dev_warn(&pdev->dev, + "Warning: couldn't set 64-bit consistent PCI DMA mask\n"); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, + "Can't set consistent PCI DMA mask, aborting\n"); + return err; + } + } + + dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024); + return err; +} + +static int mlx5_pci_enable_device(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + int err = 0; + + mutex_lock(&dev->pci_status_mutex); + if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) { + err = pci_enable_device(pdev); + if (!err) + dev->pci_status = MLX5_PCI_STATUS_ENABLED; + } + mutex_unlock(&dev->pci_status_mutex); + + return err; +} + +static void mlx5_pci_disable_device(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + + mutex_lock(&dev->pci_status_mutex); + if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) { + pci_disable_device(pdev); + dev->pci_status = MLX5_PCI_STATUS_DISABLED; + } + mutex_unlock(&dev->pci_status_mutex); +} + +static int request_bar(struct pci_dev *pdev) +{ + int err = 0; + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "Missing registers BAR, aborting\n"); + return -ENODEV; + } + + err = pci_request_regions(pdev, DRIVER_NAME); + if (err) + dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); + + return err; +} + +static void release_bar(struct pci_dev *pdev) +{ + pci_release_regions(pdev); +} + +static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *table = &priv->eq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int nvec; + int err; + + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_EQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_EQ_VEC_COMP_BASE) + return -ENOMEM; + + priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); + if (!priv->irq_info) + return -ENOMEM; + + nvec = pci_alloc_irq_vectors(dev->pdev, + MLX5_EQ_VEC_COMP_BASE + 1, nvec, + PCI_IRQ_MSIX); + if (nvec < 0) { + err = nvec; + goto err_free_irq_info; + } + + table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + + return 0; + +err_free_irq_info: + kfree(priv->irq_info); + return err; +} + +static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + + pci_free_irq_vectors(dev->pdev); + kfree(priv->irq_info); +} + +struct mlx5_reg_host_endianness { + u8 he; + u8 rsvd[15]; +}; + +#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos)) + +enum { + MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) | + MLX5_DEV_CAP_FLAG_DCT, +}; + +static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size) +{ + switch (size) { + case 128: + return 0; + case 256: + return 1; + case 512: + return 2; + case 1024: + return 3; + case 2048: + return 4; + case 4096: + return 5; + default: + mlx5_core_warn(dev, "invalid pkey table size %d\n", size); + return 0; + } +} + +static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, + enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode) +{ + u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; + int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *out, *hca_caps; + u16 opmod = (cap_type << 1) | (cap_mode & 0x01); + int err; + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, opmod); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + if (err) { + mlx5_core_warn(dev, + "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n", + cap_type, cap_mode, err); + goto query_ex; + } + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); + + switch (cap_mode) { + case HCA_CAP_OPMOD_GET_MAX: + memcpy(dev->caps.hca_max[cap_type], hca_caps, + MLX5_UN_SZ_BYTES(hca_cap_union)); + break; + case HCA_CAP_OPMOD_GET_CUR: + memcpy(dev->caps.hca_cur[cap_type], hca_caps, + MLX5_UN_SZ_BYTES(hca_cap_union)); + break; + default: + mlx5_core_warn(dev, + "Tried to query dev cap type(%x) with wrong opmode(%x)\n", + cap_type, cap_mode); + err = -EINVAL; + break; + } +query_ex: + kfree(out); + return err; +} + +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type) +{ + int ret; + + ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR); + if (ret) + return ret; + return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX); +} + +static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod) +{ + u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0}; + + MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1); + return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); +} + +static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) +{ + void *set_ctx; + void *set_hca_cap; + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + int req_endianness; + int err; + + if (MLX5_CAP_GEN(dev, atomic)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC); + if (err) + return err; + } else { + return 0; + } + + req_endianness = + MLX5_CAP_ATOMIC(dev, + supported_atomic_req_8B_endianness_mode_1); + + if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS) + return 0; + + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + + /* Set requestor to host endianness */ + MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode, + MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS); + + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC); + + kfree(set_ctx); + return err; +} + +static int handle_hca_cap(struct mlx5_core_dev *dev) +{ + void *set_ctx = NULL; + struct mlx5_profile *prof = dev->profile; + int err = -ENOMEM; + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *set_hca_cap; + + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + goto query_ex; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); + if (err) + goto query_ex; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, + capability); + memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_GENERAL], + MLX5_ST_SZ_BYTES(cmd_hca_cap)); + + mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", + mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), + 128); + /* we limit the size of the pkey table to 128 entries for now */ + MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, + to_fw_pkey_sz(dev, 128)); + + /* Check log_max_qp from HCA caps to set in current profile */ + if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) { + mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", + profile[prof_sel].log_max_qp, + MLX5_CAP_GEN_MAX(dev, log_max_qp)); + profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + } + if (prof->mask & MLX5_PROF_MASK_QP_SIZE) + MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, + prof->log_max_qp); + + /* disable cmdif checksum */ + MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); + + /* Enable 4K UAR only when HCA supports it and page size is bigger + * than 4K. + */ + if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096) + MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1); + + MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); + + if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte)) + MLX5_SET(cmd_hca_cap, + set_hca_cap, + cache_line_128byte, + cache_line_size() >= 128 ? 1 : 0); + + if (MLX5_CAP_GEN_MAX(dev, dct)) + MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1); + + if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports)) + MLX5_SET(cmd_hca_cap, + set_hca_cap, + num_vhca_ports, + MLX5_CAP_GEN_MAX(dev, num_vhca_ports)); + + err = set_caps(dev, set_ctx, set_sz, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); + +query_ex: + kfree(set_ctx); + return err; +} + +static int set_hca_ctrl(struct mlx5_core_dev *dev) +{ + struct mlx5_reg_host_endianness he_in; + struct mlx5_reg_host_endianness he_out; + int err; + + if (!mlx5_core_is_pf(dev)) + return 0; + + memset(&he_in, 0, sizeof(he_in)); + he_in.he = MLX5_SET_HOST_ENDIANNESS; + err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), + &he_out, sizeof(he_out), + MLX5_REG_HOST_ENDIANNESS, 0, 1); + return err; +} + +static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev) +{ + int ret = 0; + + /* Disable local_lb by default */ + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) + ret = mlx5_nic_vport_update_local_lb(dev, false); + + return ret; +} + +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, func_id); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, func_id); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) +{ + u32 timer_h, timer_h1, timer_l; + + timer_h = ioread32be(&dev->iseg->internal_timer_h); + timer_l = ioread32be(&dev->iseg->internal_timer_l); + timer_h1 = ioread32be(&dev->iseg->internal_timer_h); + if (timer_h != timer_h1) /* wrap around */ + timer_l = ioread32be(&dev->iseg->internal_timer_l); + + return (u64)timer_l | (u64)timer_h1 << 32; +} + +static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int irq = pci_irq_vector(mdev->pdev, vecidx); + + if (!zalloc_cpumask_var(&priv->irq_info[vecidx].mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), + priv->irq_info[vecidx].mask); + + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irq, priv->irq_info[vecidx].mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); + + return 0; +} + +static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + struct mlx5_priv *priv = &mdev->priv; + int irq = pci_irq_vector(mdev->pdev, vecidx); + + irq_set_affinity_hint(irq, NULL); + free_cpumask_var(priv->irq_info[vecidx].mask); +} + +static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) +{ + int err; + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { + err = mlx5_irq_set_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + mlx5_irq_clear_affinity_hint(mdev, i); + + return err; +} + +static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) +{ + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) + mlx5_irq_clear_affinity_hint(mdev, i); +} + +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, + unsigned int *irqn) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq, *n; + int err = -ENOENT; + + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + if (eq->index == vector) { + *eqn = eq->eqn; + *irqn = eq->irqn; + err = 0; + break; + } + } + spin_unlock(&table->lock); + + return err; +} +EXPORT_SYMBOL(mlx5_vector2eqn); + +struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq; + + spin_lock(&table->lock); + list_for_each_entry(eq, &table->comp_eqs_list, list) + if (eq->eqn == eqn) { + spin_unlock(&table->lock); + return eq; + } + + spin_unlock(&table->lock); + + return ERR_PTR(-ENOENT); +} + +static void free_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq, *n; + +#ifdef CONFIG_RFS_ACCEL + if (dev->rmap) { + free_irq_cpu_rmap(dev->rmap); + dev->rmap = NULL; + } +#endif + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + list_del(&eq->list); + spin_unlock(&table->lock); + if (mlx5_destroy_unmap_eq(dev, eq)) + mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n", + eq->eqn); + kfree(eq); + spin_lock(&table->lock); + } + spin_unlock(&table->lock); +} + +static int alloc_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + char name[MLX5_MAX_IRQ_NAME]; + struct mlx5_eq *eq; + int ncomp_vec; + int nent; + int err; + int i; + + INIT_LIST_HEAD(&table->comp_eqs_list); + ncomp_vec = table->num_comp_vectors; + nent = MLX5_COMP_EQ_SIZE; +#ifdef CONFIG_RFS_ACCEL + dev->rmap = alloc_irq_cpu_rmap(ncomp_vec); + if (!dev->rmap) + return -ENOMEM; +#endif + for (i = 0; i < ncomp_vec; i++) { + eq = kzalloc(sizeof(*eq), GFP_KERNEL); + if (!eq) { + err = -ENOMEM; + goto clean; + } + +#ifdef CONFIG_RFS_ACCEL + irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev, + MLX5_EQ_VEC_COMP_BASE + i)); +#endif + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); + err = mlx5_create_map_eq(dev, eq, + i + MLX5_EQ_VEC_COMP_BASE, nent, 0, + name, MLX5_EQ_TYPE_COMP); + if (err) { + kfree(eq); + goto clean; + } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); + eq->index = i; + spin_lock(&table->lock); + list_add_tail(&eq->list, &table->comp_eqs_list); + spin_unlock(&table->lock); + } + + return 0; + +clean: + free_comp_eqs(dev); + return err; +} + +static int mlx5_core_set_issi(struct mlx5_core_dev *dev) +{ + u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0}; + u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0}; + u32 sup_issi; + int err; + + MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); + err = mlx5_cmd_exec(dev, query_in, sizeof(query_in), + query_out, sizeof(query_out)); + if (err) { + u32 syndrome; + u8 status; + + mlx5_cmd_mbox_status(query_out, &status, &syndrome); + if (!status || syndrome == MLX5_DRIVER_SYND) { + mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n", + err, status, syndrome); + return err; + } + + mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n"); + dev->issi = 0; + return 0; + } + + sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); + + if (sup_issi & (1 << 1)) { + u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {0}; + u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0}; + + MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI); + MLX5_SET(set_issi_in, set_in, current_issi, 1); + err = mlx5_cmd_exec(dev, set_in, sizeof(set_in), + set_out, sizeof(set_out)); + if (err) { + mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n", + err); + return err; + } + + dev->issi = 1; + + return 0; + } else if (sup_issi & (1 << 0) || !sup_issi) { + return 0; + } + + return -EOPNOTSUPP; +} + +static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +{ + struct pci_dev *pdev = dev->pdev; + int err = 0; + + pci_set_drvdata(dev->pdev, dev); + strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN); + priv->name[MLX5_MAX_NAME_LEN - 1] = 0; + + mutex_init(&priv->pgdir_mutex); + INIT_LIST_HEAD(&priv->pgdir_list); + spin_lock_init(&priv->mkey_lock); + + mutex_init(&priv->alloc_mutex); + + priv->numa_node = dev_to_node(&dev->pdev->dev); + + if (mlx5_debugfs_root) + priv->dbg_root = + debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root); + + err = mlx5_pci_enable_device(dev); + if (err) { + dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n"); + goto err_dbg; + } + + err = request_bar(pdev); + if (err) { + dev_err(&pdev->dev, "error requesting BARs, aborting\n"); + goto err_disable; + } + + pci_set_master(pdev); + + err = set_dma_caps(pdev); + if (err) { + dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n"); + goto err_clr_master; + } + + dev->iseg_base = pci_resource_start(dev->pdev, 0); + dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); + if (!dev->iseg) { + err = -ENOMEM; + dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n"); + goto err_clr_master; + } + + return 0; + +err_clr_master: + pci_clear_master(dev->pdev); + release_bar(dev->pdev); +err_disable: + mlx5_pci_disable_device(dev); + +err_dbg: + debugfs_remove(priv->dbg_root); + return err; +} + +static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +{ + iounmap(dev->iseg); + pci_clear_master(dev->pdev); + release_bar(dev->pdev); + mlx5_pci_disable_device(dev); + debugfs_remove_recursive(priv->dbg_root); +} + +static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + err = mlx5_query_board_id(dev); + if (err) { + dev_err(&pdev->dev, "query board id failed\n"); + goto out; + } + + err = mlx5_eq_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize eq\n"); + goto out; + } + + err = mlx5_cq_debugfs_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize cq debugfs\n"); + goto err_eq_cleanup; + } + + mlx5_init_qp_table(dev); + + mlx5_init_srq_table(dev); + + mlx5_init_mkey_table(dev); + + mlx5_init_reserved_gids(dev); + + mlx5_init_clock(dev); + + dev->vxlan = mlx5_vxlan_create(dev); + + err = mlx5_init_rl_table(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init rate limiting\n"); + goto err_tables_cleanup; + } + + err = mlx5_mpfs_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init l2 table %d\n", err); + goto err_rl_cleanup; + } + + err = mlx5_eswitch_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init eswitch %d\n", err); + goto err_mpfs_cleanup; + } + + err = mlx5_sriov_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init sriov %d\n", err); + goto err_eswitch_cleanup; + } + + err = mlx5_fpga_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init fpga device %d\n", err); + goto err_sriov_cleanup; + } + + dev->tracer = mlx5_fw_tracer_create(dev); + + return 0; + +err_sriov_cleanup: + mlx5_sriov_cleanup(dev); +err_eswitch_cleanup: + mlx5_eswitch_cleanup(dev->priv.eswitch); +err_mpfs_cleanup: + mlx5_mpfs_cleanup(dev); +err_rl_cleanup: + mlx5_cleanup_rl_table(dev); +err_tables_cleanup: + mlx5_vxlan_destroy(dev->vxlan); + mlx5_cleanup_mkey_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cq_debugfs_cleanup(dev); + +err_eq_cleanup: + mlx5_eq_cleanup(dev); + +out: + return err; +} + +static void mlx5_cleanup_once(struct mlx5_core_dev *dev) +{ + mlx5_fw_tracer_destroy(dev->tracer); + mlx5_fpga_cleanup(dev); + mlx5_sriov_cleanup(dev); + mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_mpfs_cleanup(dev); + mlx5_cleanup_rl_table(dev); + mlx5_vxlan_destroy(dev->vxlan); + mlx5_cleanup_clock(dev); + mlx5_cleanup_reserved_gids(dev); + mlx5_cleanup_mkey_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cq_debugfs_cleanup(dev); + mlx5_eq_cleanup(dev); +} + +static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, + bool boot) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + mutex_lock(&dev->intf_state_mutex); + if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n", + __func__); + goto out; + } + + dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), + fw_rev_min(dev), fw_rev_sub(dev)); + + /* Only PFs hold the relevant PCIe information for this query */ + if (mlx5_core_is_pf(dev)) + pcie_print_link_status(dev->pdev); + + /* on load removing any previous indication of internal error, device is + * up + */ + dev->state = MLX5_DEVICE_STATE_UP; + + /* wait for firmware to accept initialization segments configurations + */ + err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI); + if (err) { + dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n", + FW_PRE_INIT_TIMEOUT_MILI); + goto out_err; + } + + err = mlx5_cmd_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed initializing command interface, aborting\n"); + goto out_err; + } + + err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI); + if (err) { + dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n", + FW_INIT_TIMEOUT_MILI); + goto err_cmd_cleanup; + } + + err = mlx5_core_enable_hca(dev, 0); + if (err) { + dev_err(&pdev->dev, "enable hca failed\n"); + goto err_cmd_cleanup; + } + + err = mlx5_core_set_issi(dev); + if (err) { + dev_err(&pdev->dev, "failed to set issi\n"); + goto err_disable_hca; + } + + err = mlx5_satisfy_startup_pages(dev, 1); + if (err) { + dev_err(&pdev->dev, "failed to allocate boot pages\n"); + goto err_disable_hca; + } + + err = set_hca_ctrl(dev); + if (err) { + dev_err(&pdev->dev, "set_hca_ctrl failed\n"); + goto reclaim_boot_pages; + } + + err = handle_hca_cap(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap failed\n"); + goto reclaim_boot_pages; + } + + err = handle_hca_cap_atomic(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n"); + goto reclaim_boot_pages; + } + + err = mlx5_satisfy_startup_pages(dev, 0); + if (err) { + dev_err(&pdev->dev, "failed to allocate init pages\n"); + goto reclaim_boot_pages; + } + + err = mlx5_pagealloc_start(dev); + if (err) { + dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n"); + goto reclaim_boot_pages; + } + + err = mlx5_cmd_init_hca(dev, sw_owner_id); + if (err) { + dev_err(&pdev->dev, "init hca failed\n"); + goto err_pagealloc_stop; + } + + mlx5_set_driver_version(dev); + + mlx5_start_health_poll(dev); + + err = mlx5_query_hca_caps(dev); + if (err) { + dev_err(&pdev->dev, "query hca failed\n"); + goto err_stop_poll; + } + + if (boot) { + err = mlx5_init_once(dev, priv); + if (err) { + dev_err(&pdev->dev, "sw objs init failed\n"); + goto err_stop_poll; + } + } + + err = mlx5_alloc_irq_vectors(dev); + if (err) { + dev_err(&pdev->dev, "alloc irq vectors failed\n"); + goto err_cleanup_once; + } + + dev->priv.uar = mlx5_get_uars_page(dev); + if (IS_ERR(dev->priv.uar)) { + dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); + err = PTR_ERR(dev->priv.uar); + goto err_disable_msix; + } + + err = mlx5_start_eqs(dev); + if (err) { + dev_err(&pdev->dev, "Failed to start pages and async EQs\n"); + goto err_put_uars; + } + + err = mlx5_fw_tracer_init(dev->tracer); + if (err) { + dev_err(&pdev->dev, "Failed to init FW tracer\n"); + goto err_fw_tracer; + } + + err = alloc_comp_eqs(dev); + if (err) { + dev_err(&pdev->dev, "Failed to alloc completion EQs\n"); + goto err_comp_eqs; + } + + err = mlx5_irq_set_affinity_hints(dev); + if (err) { + dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); + goto err_affinity_hints; + } + + err = mlx5_fpga_device_start(dev); + if (err) { + dev_err(&pdev->dev, "fpga device start failed %d\n", err); + goto err_fpga_start; + } + + err = mlx5_accel_ipsec_init(dev); + if (err) { + dev_err(&pdev->dev, "IPSec device start failed %d\n", err); + goto err_ipsec_start; + } + + err = mlx5_accel_tls_init(dev); + if (err) { + dev_err(&pdev->dev, "TLS device start failed %d\n", err); + goto err_tls_start; + } + + err = mlx5_init_fs(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init flow steering\n"); + goto err_fs; + } + + err = mlx5_core_set_hca_defaults(dev); + if (err) { + dev_err(&pdev->dev, "Failed to set hca defaults\n"); + goto err_fs; + } + + err = mlx5_sriov_attach(dev); + if (err) { + dev_err(&pdev->dev, "sriov init failed %d\n", err); + goto err_sriov; + } + + if (mlx5_device_registered(dev)) { + mlx5_attach_device(dev); + } else { + err = mlx5_register_device(dev); + if (err) { + dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); + goto err_reg_dev; + } + } + + set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); +out: + mutex_unlock(&dev->intf_state_mutex); + + return 0; + +err_reg_dev: + mlx5_sriov_detach(dev); + +err_sriov: + mlx5_cleanup_fs(dev); + +err_fs: + mlx5_accel_tls_cleanup(dev); + +err_tls_start: + mlx5_accel_ipsec_cleanup(dev); + +err_ipsec_start: + mlx5_fpga_device_stop(dev); + +err_fpga_start: + mlx5_irq_clear_affinity_hints(dev); + +err_affinity_hints: + free_comp_eqs(dev); + +err_comp_eqs: + mlx5_fw_tracer_cleanup(dev->tracer); + +err_fw_tracer: + mlx5_stop_eqs(dev); + +err_put_uars: + mlx5_put_uars_page(dev, priv->uar); + +err_disable_msix: + mlx5_free_irq_vectors(dev); + +err_cleanup_once: + if (boot) + mlx5_cleanup_once(dev); + +err_stop_poll: + mlx5_stop_health_poll(dev, boot); + if (mlx5_cmd_teardown_hca(dev)) { + dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); + goto out_err; + } + +err_pagealloc_stop: + mlx5_pagealloc_stop(dev); + +reclaim_boot_pages: + mlx5_reclaim_startup_pages(dev); + +err_disable_hca: + mlx5_core_disable_hca(dev, 0); + +err_cmd_cleanup: + mlx5_cmd_cleanup(dev); + +out_err: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + mutex_unlock(&dev->intf_state_mutex); + + return err; +} + +static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, + bool cleanup) +{ + int err = 0; + + if (cleanup) + mlx5_drain_health_recovery(dev); + + mutex_lock(&dev->intf_state_mutex); + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", + __func__); + if (cleanup) + mlx5_cleanup_once(dev); + goto out; + } + + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + + if (mlx5_device_registered(dev)) + mlx5_detach_device(dev); + + mlx5_sriov_detach(dev); + mlx5_cleanup_fs(dev); + mlx5_accel_ipsec_cleanup(dev); + mlx5_accel_tls_cleanup(dev); + mlx5_fpga_device_stop(dev); + mlx5_irq_clear_affinity_hints(dev); + free_comp_eqs(dev); + mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_stop_eqs(dev); + mlx5_put_uars_page(dev, priv->uar); + mlx5_free_irq_vectors(dev); + if (cleanup) + mlx5_cleanup_once(dev); + mlx5_stop_health_poll(dev, cleanup); + err = mlx5_cmd_teardown_hca(dev); + if (err) { + dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); + goto out; + } + mlx5_pagealloc_stop(dev); + mlx5_reclaim_startup_pages(dev); + mlx5_core_disable_hca(dev, 0); + mlx5_cmd_cleanup(dev); + +out: + mutex_unlock(&dev->intf_state_mutex); + return err; +} + +struct mlx5_core_event_handler { + void (*event)(struct mlx5_core_dev *dev, + enum mlx5_dev_event event, + void *data); +}; + +static const struct devlink_ops mlx5_devlink_ops = { +#ifdef CONFIG_MLX5_ESWITCH + .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, + .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, + .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, + .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, + .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, + .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, +#endif +}; + +#define MLX5_IB_MOD "mlx5_ib" +static int init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct mlx5_core_dev *dev; + struct devlink *devlink; + struct mlx5_priv *priv; + int err; + + devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev)); + if (!devlink) { + dev_err(&pdev->dev, "kzalloc failed\n"); + return -ENOMEM; + } + + dev = devlink_priv(devlink); + priv = &dev->priv; + priv->pci_dev_data = id->driver_data; + + pci_set_drvdata(pdev, dev); + + dev->pdev = pdev; + dev->event = mlx5_core_event; + dev->profile = &profile[prof_sel]; + + INIT_LIST_HEAD(&priv->ctx_list); + spin_lock_init(&priv->ctx_lock); + mutex_init(&dev->pci_status_mutex); + mutex_init(&dev->intf_state_mutex); + + INIT_LIST_HEAD(&priv->waiting_events_list); + priv->is_accum_events = false; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + err = init_srcu_struct(&priv->pfault_srcu); + if (err) { + dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n", + err); + goto clean_dev; + } +#endif + mutex_init(&priv->bfregs.reg_head.lock); + mutex_init(&priv->bfregs.wc_head.lock); + INIT_LIST_HEAD(&priv->bfregs.reg_head.list); + INIT_LIST_HEAD(&priv->bfregs.wc_head.list); + + err = mlx5_pci_init(dev, priv); + if (err) { + dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); + goto clean_srcu; + } + + err = mlx5_health_init(dev); + if (err) { + dev_err(&pdev->dev, "mlx5_health_init failed with error code %d\n", err); + goto close_pci; + } + + mlx5_pagealloc_init(dev); + + err = mlx5_load_one(dev, priv, true); + if (err) { + dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); + goto clean_health; + } + + request_module_nowait(MLX5_IB_MOD); + + err = devlink_register(devlink, &pdev->dev); + if (err) + goto clean_load; + + pci_save_state(pdev); + return 0; + +clean_load: + mlx5_unload_one(dev, priv, true); +clean_health: + mlx5_pagealloc_cleanup(dev); + mlx5_health_cleanup(dev); +close_pci: + mlx5_pci_close(dev, priv); +clean_srcu: +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + cleanup_srcu_struct(&priv->pfault_srcu); +clean_dev: +#endif + devlink_free(devlink); + + return err; +} + +static void remove_one(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); + struct mlx5_priv *priv = &dev->priv; + + devlink_unregister(devlink); + mlx5_unregister_device(dev); + + if (mlx5_unload_one(dev, priv, true)) { + dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); + mlx5_health_cleanup(dev); + return; + } + + mlx5_pagealloc_cleanup(dev); + mlx5_health_cleanup(dev); + mlx5_pci_close(dev, priv); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + cleanup_srcu_struct(&priv->pfault_srcu); +#endif + devlink_free(devlink); +} + +static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_priv *priv = &dev->priv; + + dev_info(&pdev->dev, "%s was called\n", __func__); + + mlx5_enter_error_state(dev, false); + mlx5_unload_one(dev, priv, false); + /* In case of kernel call drain the health wq */ + if (state) { + mlx5_drain_health_wq(dev); + mlx5_pci_disable_device(dev); + } + + return state == pci_channel_io_perm_failure ? + PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; +} + +/* wait for the device to show vital signs by waiting + * for the health counter to start counting. + */ +static int wait_vital(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_health *health = &dev->priv.health; + const int niter = 100; + u32 last_count = 0; + u32 count; + int i; + + for (i = 0; i < niter; i++) { + count = ioread32be(health->health_counter); + if (count && count != 0xffffffff) { + if (last_count && last_count != count) { + dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i); + return 0; + } + last_count = count; + } + msleep(50); + } + + return -ETIMEDOUT; +} + +static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + dev_info(&pdev->dev, "%s was called\n", __func__); + + err = mlx5_pci_enable_device(dev); + if (err) { + dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n" + , __func__, err); + return PCI_ERS_RESULT_DISCONNECT; + } + + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + + if (wait_vital(pdev)) { + dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); + return PCI_ERS_RESULT_DISCONNECT; + } + + return PCI_ERS_RESULT_RECOVERED; +} + +static void mlx5_pci_resume(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_priv *priv = &dev->priv; + int err; + + dev_info(&pdev->dev, "%s was called\n", __func__); + + err = mlx5_load_one(dev, priv, false); + if (err) + dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" + , __func__, err); + else + dev_info(&pdev->dev, "%s: device recovered\n", __func__); +} + +static const struct pci_error_handlers mlx5_err_handler = { + .error_detected = mlx5_pci_err_detected, + .slot_reset = mlx5_pci_slot_reset, + .resume = mlx5_pci_resume +}; + +static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) +{ + int ret; + + if (!MLX5_CAP_GEN(dev, force_teardown)) { + mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n"); + return -EOPNOTSUPP; + } + + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_core_dbg(dev, "Device in internal error state, giving up\n"); + return -EAGAIN; + } + + /* Panic tear down fw command will stop the PCI bus communication + * with the HCA, so the health polll is no longer needed. + */ + mlx5_drain_health_wq(dev); + mlx5_stop_health_poll(dev, false); + + ret = mlx5_cmd_force_teardown_hca(dev); + if (ret) { + mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret); + mlx5_start_health_poll(dev); + return ret; + } + + mlx5_enter_error_state(dev, true); + + /* Some platforms requiring freeing the IRQ's in the shutdown + * flow. If they aren't freed they can't be allocated after + * kexec. There is no need to cleanup the mlx5_core software + * contexts. + */ + mlx5_irq_clear_affinity_hints(dev); + mlx5_core_eq_free_irqs(dev); + + return 0; +} + +static void shutdown(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_priv *priv = &dev->priv; + int err; + + dev_info(&pdev->dev, "Shutdown was called\n"); + err = mlx5_try_fast_unload(dev); + if (err) + mlx5_unload_one(dev, priv, false); + mlx5_pci_disable_device(dev); +} + +static const struct pci_device_id mlx5_core_pci_table[] = { + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) }, + { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) }, + { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) }, + { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ + { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ + { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ + { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5 Ex */ + { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */ + { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */ + { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */ + { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ + { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ + { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); + +void mlx5_disable_device(struct mlx5_core_dev *dev) +{ + mlx5_pci_err_detected(dev->pdev, 0); +} + +void mlx5_recover_device(struct mlx5_core_dev *dev) +{ + mlx5_pci_disable_device(dev); + if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) + mlx5_pci_resume(dev->pdev); +} + +static struct pci_driver mlx5_core_driver = { + .name = DRIVER_NAME, + .id_table = mlx5_core_pci_table, + .probe = init_one, + .remove = remove_one, + .shutdown = shutdown, + .err_handler = &mlx5_err_handler, + .sriov_configure = mlx5_core_sriov_configure, +}; + +static void mlx5_core_verify_params(void) +{ + if (prof_sel >= ARRAY_SIZE(profile)) { + pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n", + prof_sel, + ARRAY_SIZE(profile) - 1, + MLX5_DEFAULT_PROF); + prof_sel = MLX5_DEFAULT_PROF; + } +} + +static int __init init(void) +{ + int err; + + get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); + + mlx5_core_verify_params(); + mlx5_fpga_ipsec_build_fs_cmds(); + mlx5_register_debugfs(); + + err = pci_register_driver(&mlx5_core_driver); + if (err) + goto err_debug; + +#ifdef CONFIG_MLX5_CORE_EN + mlx5e_init(); +#endif + + return 0; + +err_debug: + mlx5_unregister_debugfs(); + return err; +} + +static void __exit cleanup(void) +{ +#ifdef CONFIG_MLX5_CORE_EN + mlx5e_cleanup(); +#endif + pci_unregister_driver(&mlx5_core_driver); + mlx5_unregister_debugfs(); +} + +module_init(init); +module_exit(cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c new file mode 100644 index 000000000..ba2b09cc1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include <rdma/ib_verbs.h> +#include "mlx5_core.h" + +int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) +{ + u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {0}; + void *gid; + + MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG); + MLX5_SET(attach_to_mcg_in, in, qpn, qpn); + gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid); + memcpy(gid, mgid, sizeof(*mgid)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_attach_mcg); + +int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) +{ + u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {0}; + void *gid; + + MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, in, qpn, qpn); + gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid); + memcpy(gid, mgid, sizeof(*mgid)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_detach_mcg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h new file mode 100644 index 000000000..b4134fa0b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_CORE_H__ +#define __MLX5_CORE_H__ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/if_link.h> +#include <linux/firmware.h> +#include <linux/mlx5/cq.h> + +#define DRIVER_NAME "mlx5_core" +#define DRIVER_VERSION "5.0-0" + +extern uint mlx5_core_debug_mask; + +#define mlx5_core_dbg(__dev, format, ...) \ + dev_dbg(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_dbg_once(__dev, format, ...) \ + dev_dbg_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_dbg_mask(__dev, mask, format, ...) \ +do { \ + if ((mask) & mlx5_core_debug_mask) \ + mlx5_core_dbg(__dev, format, ##__VA_ARGS__); \ +} while (0) + +#define mlx5_core_err(__dev, format, ...) \ + dev_err(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_err_rl(__dev, format, ...) \ + dev_err_ratelimited(&(__dev)->pdev->dev, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_warn(__dev, format, ...) \ + dev_warn(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_info(__dev, format, ...) \ + dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) + +enum { + MLX5_CMD_DATA, /* print command payload only */ + MLX5_CMD_TIME, /* print command execution time */ +}; + +enum { + MLX5_DRIVER_STATUS_ABORTED = 0xfe, + MLX5_DRIVER_SYND = 0xbadd00de, +}; + +int mlx5_query_hca_caps(struct mlx5_core_dev *dev); +int mlx5_query_board_id(struct mlx5_core_dev *dev); +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); +int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); +void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, + unsigned long param); +void mlx5_core_page_fault(struct mlx5_core_dev *dev, + struct mlx5_pagefault *pfault); +void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); +void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); +void mlx5_disable_device(struct mlx5_core_dev *dev); +void mlx5_recover_device(struct mlx5_core_dev *dev); +int mlx5_sriov_init(struct mlx5_core_dev *dev); +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); +int mlx5_sriov_attach(struct mlx5_core_dev *dev); +void mlx5_sriov_detach(struct mlx5_core_dev *dev); +int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev); +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *context, u32 *element_id); +int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *context, u32 element_id, + u32 modify_bitmask); +int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + u32 element_id); +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); +u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); + +int mlx5_eq_init(struct mlx5_core_dev *dev); +void mlx5_eq_cleanup(struct mlx5_core_dev *dev); +int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, + int nent, u64 mask, const char *name, + enum mlx5_eq_type type); +int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + u32 *out, int outlen); +int mlx5_start_eqs(struct mlx5_core_dev *dev); +void mlx5_stop_eqs(struct mlx5_core_dev *dev); +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); +struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); +void mlx5_cq_tasklet_cb(unsigned long data); +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); + +int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, + u8 access_reg_group); +int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group, + u8 access_reg_group); +int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, + u8 feature_group, u8 access_reg_group); + +void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); +void mlx5_lag_remove(struct mlx5_core_dev *dev); + +void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv); +void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); +void mlx5_attach_device(struct mlx5_core_dev *dev); +void mlx5_detach_device(struct mlx5_core_dev *dev); +bool mlx5_device_registered(struct mlx5_core_dev *dev); +int mlx5_register_device(struct mlx5_core_dev *dev); +void mlx5_unregister_device(struct mlx5_core_dev *dev); +void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); +void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); +struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); +void mlx5_dev_list_lock(void); +void mlx5_dev_list_unlock(void); +int mlx5_dev_list_trylock(void); +int mlx5_encap_alloc(struct mlx5_core_dev *dev, + int header_type, + size_t size, + void *encap_header, + u32 *encap_id); +void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id); + +int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 namespace, u8 num_actions, + void *modify_actions, u32 *modify_header_id); +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id); + +bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); + +int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size); +int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); +int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); +int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); + +#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \ + MLX5_CAP_GEN((mdev), pps_modify) && \ + MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ + MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj)) + +int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw); + +void mlx5e_init(void); +void mlx5e_cleanup(void); + +static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) +{ + /* LACP owner conditions: + * 1) Function is physical. + * 2) LAG is supported by FW. + * 3) LAG is managed by driver (currently the only option). + */ + return MLX5_CAP_GEN(dev, vport_group_manager) && + (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && + MLX5_CAP_GEN(dev, lag_master); +} + +int mlx5_lag_allow(struct mlx5_core_dev *dev); +int mlx5_lag_forbid(struct mlx5_core_dev *dev); + +void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol); +#endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c new file mode 100644 index 000000000..0670165af --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +void mlx5_init_mkey_table(struct mlx5_core_dev *dev) +{ + struct mlx5_mkey_table *table = &dev->priv.mkey_table; + + memset(table, 0, sizeof(*table)); + rwlock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); +} + +void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) +{ +} + +int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen, + u32 *out, int outlen, + mlx5_cmd_cbk_t callback, void *context) +{ + struct mlx5_mkey_table *table = &dev->priv.mkey_table; + u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; + u32 mkey_index; + void *mkc; + int err; + u8 key; + + spin_lock_irq(&dev->priv.mkey_lock); + key = dev->priv.mkey_key++; + spin_unlock_irq(&dev->priv.mkey_lock); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + MLX5_SET(mkc, mkc, mkey_7_0, key); + + if (callback) + return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen, + callback, context); + + err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); + if (err) + return err; + + mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); + mkey->iova = MLX5_GET64(mkc, mkc, start_addr); + mkey->size = MLX5_GET64(mkc, mkc, len); + mkey->key = mlx5_idx_to_mkey(mkey_index) | key; + mkey->pd = MLX5_GET(mkc, mkc, pd); + + mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", + mkey_index, key, mkey->key); + + /* connect to mkey tree */ + write_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey); + write_unlock_irq(&table->lock); + if (err) { + mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n", + mlx5_base_mkey(mkey->key), err); + mlx5_core_destroy_mkey(dev, mkey); + } + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_mkey_cb); + +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen) +{ + return mlx5_core_create_mkey_cb(dev, mkey, in, inlen, + NULL, 0, NULL, NULL); +} +EXPORT_SYMBOL(mlx5_core_create_mkey); + +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey) +{ + struct mlx5_mkey_table *table = &dev->priv.mkey_table; + u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; + struct mlx5_core_mkey *deleted_mkey; + unsigned long flags; + + write_lock_irqsave(&table->lock, flags); + deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); + write_unlock_irqrestore(&table->lock, flags); + if (!deleted_mkey) { + mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n", + mlx5_base_mkey(mkey->key)); + return -ENOENT; + } + + MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_destroy_mkey); + +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0}; + + memset(out, 0, outlen); + MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY); + MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} +EXPORT_SYMBOL(mlx5_core_query_mkey); + +static inline u32 mlx5_get_psv(u32 *out, int psv_index) +{ + switch (psv_index) { + case 1: return MLX5_GET(create_psv_out, out, psv1_index); + case 2: return MLX5_GET(create_psv_out, out, psv2_index); + case 3: return MLX5_GET(create_psv_out, out, psv3_index); + default: return MLX5_GET(create_psv_out, out, psv0_index); + } +} + +int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, + int npsvs, u32 *sig_index) +{ + u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(create_psv_in)] = {0}; + int i, err; + + if (npsvs > MLX5_MAX_PSVS) + return -EINVAL; + + MLX5_SET(create_psv_in, in, opcode, MLX5_CMD_OP_CREATE_PSV); + MLX5_SET(create_psv_in, in, pd, pdn); + MLX5_SET(create_psv_in, in, num_psv, npsvs); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + for (i = 0; i < npsvs; i++) + sig_index[i] = mlx5_get_psv(out, i); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_psv); + +int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num) +{ + u32 out[MLX5_ST_SZ_DW(destroy_psv_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {0}; + + MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV); + MLX5_SET(destroy_psv_in, in, psvn, psv_num); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_destroy_psv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c new file mode 100644 index 000000000..9c3653e06 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -0,0 +1,596 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/highmem.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +enum { + MLX5_PAGES_CANT_GIVE = 0, + MLX5_PAGES_GIVE = 1, + MLX5_PAGES_TAKE = 2 +}; + +struct mlx5_pages_req { + struct mlx5_core_dev *dev; + u16 func_id; + s32 npages; + struct work_struct work; +}; + +struct fw_page { + struct rb_node rb_node; + u64 addr; + struct page *page; + u16 func_id; + unsigned long bitmask; + struct list_head list; + unsigned free_count; +}; + +enum { + MAX_RECLAIM_TIME_MSECS = 5000, + MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60, +}; + +enum { + MLX5_MAX_RECLAIM_TIME_MILI = 5000, + MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, +}; + +static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id) +{ + struct rb_root *root = &dev->priv.page_root; + struct rb_node **new = &root->rb_node; + struct rb_node *parent = NULL; + struct fw_page *nfp; + struct fw_page *tfp; + int i; + + while (*new) { + parent = *new; + tfp = rb_entry(parent, struct fw_page, rb_node); + if (tfp->addr < addr) + new = &parent->rb_left; + else if (tfp->addr > addr) + new = &parent->rb_right; + else + return -EEXIST; + } + + nfp = kzalloc(sizeof(*nfp), GFP_KERNEL); + if (!nfp) + return -ENOMEM; + + nfp->addr = addr; + nfp->page = page; + nfp->func_id = func_id; + nfp->free_count = MLX5_NUM_4K_IN_PAGE; + for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++) + set_bit(i, &nfp->bitmask); + + rb_link_node(&nfp->rb_node, parent, new); + rb_insert_color(&nfp->rb_node, root); + list_add(&nfp->list, &dev->priv.free_list); + + return 0; +} + +static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr) +{ + struct rb_root *root = &dev->priv.page_root; + struct rb_node *tmp = root->rb_node; + struct fw_page *result = NULL; + struct fw_page *tfp; + + while (tmp) { + tfp = rb_entry(tmp, struct fw_page, rb_node); + if (tfp->addr < addr) { + tmp = tmp->rb_left; + } else if (tfp->addr > addr) { + tmp = tmp->rb_right; + } else { + result = tfp; + break; + } + } + + return result; +} + +static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, + s32 *npages, int boot) +{ + u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {0}; + int err; + + MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES); + MLX5_SET(query_pages_in, in, op_mod, boot ? + MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : + MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *npages = MLX5_GET(query_pages_out, out, num_pages); + *func_id = MLX5_GET(query_pages_out, out, function_id); + + return err; +} + +static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr) +{ + struct fw_page *fp; + unsigned n; + + if (list_empty(&dev->priv.free_list)) + return -ENOMEM; + + fp = list_entry(dev->priv.free_list.next, struct fw_page, list); + n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask)); + if (n >= MLX5_NUM_4K_IN_PAGE) { + mlx5_core_warn(dev, "alloc 4k bug\n"); + return -ENOENT; + } + clear_bit(n, &fp->bitmask); + fp->free_count--; + if (!fp->free_count) + list_del(&fp->list); + + *addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE; + + return 0; +} + +#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT) + +static void free_4k(struct mlx5_core_dev *dev, u64 addr) +{ + struct fw_page *fwp; + int n; + + fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK); + if (!fwp) { + mlx5_core_warn(dev, "page not found\n"); + return; + } + + n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT; + fwp->free_count++; + set_bit(n, &fwp->bitmask); + if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) { + rb_erase(&fwp->rb_node, &dev->priv.page_root); + if (fwp->free_count != 1) + list_del(&fwp->list); + dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(fwp->page); + kfree(fwp); + } else if (fwp->free_count == 1) { + list_add(&fwp->list, &dev->priv.free_list); + } +} + +static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) +{ + struct page *page; + u64 zero_addr = 1; + u64 addr; + int err; + int nid = dev_to_node(&dev->pdev->dev); + + page = alloc_pages_node(nid, GFP_HIGHUSER, 0); + if (!page) { + mlx5_core_warn(dev, "failed to allocate page\n"); + return -ENOMEM; + } +map: + addr = dma_map_page(&dev->pdev->dev, page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(&dev->pdev->dev, addr)) { + mlx5_core_warn(dev, "failed dma mapping page\n"); + err = -ENOMEM; + goto err_mapping; + } + + /* Firmware doesn't support page with physical address 0 */ + if (addr == 0) { + zero_addr = addr; + goto map; + } + + err = insert_page(dev, addr, page, func_id); + if (err) { + mlx5_core_err(dev, "failed to track allocated page\n"); + dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + } + +err_mapping: + if (err) + __free_page(page); + + if (zero_addr == 0) + dma_unmap_page(&dev->pdev->dev, zero_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + return err; +} + +static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; + int err; + + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n", + func_id, err); +} + +static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, + int notify_fail) +{ + u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; + int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); + u64 addr; + int err; + u32 *in; + int i; + + inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); + goto out_free; + } + + for (i = 0; i < npages; i++) { +retry: + err = alloc_4k(dev, &addr); + if (err) { + if (err == -ENOMEM) + err = alloc_system_page(dev, func_id); + if (err) + goto out_4k; + + goto retry; + } + MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr); + } + + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, input_num_entries, npages); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (err) { + mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", + func_id, npages, err); + goto out_4k; + } + + dev->priv.fw_pages += npages; + if (func_id) + dev->priv.vfs_pages += npages; + + mlx5_core_dbg(dev, "err %d\n", err); + + kvfree(in); + return 0; + +out_4k: + for (i--; i >= 0; i--) + free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i])); +out_free: + kvfree(in); + if (notify_fail) + page_notify_fail(dev, func_id); + return err; +} + +static u32 fwp_fill_manage_pages_out(struct fw_page *fwp, u32 *out, u32 index, + u32 npages) +{ + u32 pages_set = 0; + unsigned int n; + + for_each_clear_bit(n, &fwp->bitmask, MLX5_NUM_4K_IN_PAGE) { + MLX5_ARRAY_SET64(manage_pages_out, out, pas, index + pages_set, + fwp->addr + (n * MLX5_ADAPTER_PAGE_SIZE)); + pages_set++; + + if (!--npages) + break; + } + + return pages_set; +} + +static int reclaim_pages_cmd(struct mlx5_core_dev *dev, + u32 *in, int in_size, u32 *out, int out_size) +{ + struct fw_page *fwp; + struct rb_node *p; + u32 func_id; + u32 npages; + u32 i = 0; + + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + return mlx5_cmd_exec(dev, in, in_size, out, out_size); + + /* No hard feelings, we want our pages back! */ + npages = MLX5_GET(manage_pages_in, in, input_num_entries); + func_id = MLX5_GET(manage_pages_in, in, function_id); + + p = rb_first(&dev->priv.page_root); + while (p && i < npages) { + fwp = rb_entry(p, struct fw_page, rb_node); + p = rb_next(p); + if (fwp->func_id != func_id) + continue; + + i += fwp_fill_manage_pages_out(fwp, out, i, npages - i); + } + + MLX5_SET(manage_pages_out, out, output_num_entries, i); + return 0; +} + +static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, + int *nclaimed) +{ + int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); + u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; + int num_claimed; + u32 *out; + int err; + int i; + + if (nclaimed) + *nclaimed = 0; + + outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, input_num_entries, npages); + + mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); + err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); + if (err) { + mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); + goto out_free; + } + + num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries); + if (num_claimed > npages) { + mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n", + num_claimed, npages); + err = -EINVAL; + goto out_free; + } + + for (i = 0; i < num_claimed; i++) + free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i])); + + if (nclaimed) + *nclaimed = num_claimed; + + dev->priv.fw_pages -= num_claimed; + if (func_id) + dev->priv.vfs_pages -= num_claimed; + +out_free: + kvfree(out); + return err; +} + +static void pages_work_handler(struct work_struct *work) +{ + struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work); + struct mlx5_core_dev *dev = req->dev; + int err = 0; + + if (req->npages < 0) + err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); + else if (req->npages > 0) + err = give_pages(dev, req->func_id, req->npages, 1); + + if (err) + mlx5_core_warn(dev, "%s fail %d\n", + req->npages < 0 ? "reclaim" : "give", err); + + kfree(req); +} + +void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, + s32 npages) +{ + struct mlx5_pages_req *req; + + req = kzalloc(sizeof(*req), GFP_ATOMIC); + if (!req) { + mlx5_core_warn(dev, "failed to allocate pages request\n"); + return; + } + + req->dev = dev; + req->func_id = func_id; + req->npages = npages; + INIT_WORK(&req->work, pages_work_handler); + queue_work(dev->priv.pg_wq, &req->work); +} + +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) +{ + u16 uninitialized_var(func_id); + s32 uninitialized_var(npages); + int err; + + err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot); + if (err) + return err; + + mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", + npages, boot ? "boot" : "init", func_id); + + return give_pages(dev, func_id, npages, 0); +} + +enum { + MLX5_BLKS_FOR_RECLAIM_PAGES = 12 +}; + +static int optimal_reclaimed_pages(void) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_layout *lay; + int ret; + + ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) - + MLX5_ST_SZ_BYTES(manage_pages_out)) / + MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); + + return ret; +} + +int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) +{ + unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); + struct fw_page *fwp; + struct rb_node *p; + int nclaimed = 0; + int err = 0; + + do { + p = rb_first(&dev->priv.page_root); + if (p) { + fwp = rb_entry(p, struct fw_page, rb_node); + err = reclaim_pages(dev, fwp->func_id, + optimal_reclaimed_pages(), + &nclaimed); + + if (err) { + mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", + err); + return err; + } + if (nclaimed) + end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); + } + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); + break; + } + } while (p); + + WARN(dev->priv.fw_pages, + "FW pages counter is %d after reclaiming all pages\n", + dev->priv.fw_pages); + WARN(dev->priv.vfs_pages, + "VFs FW pages counter is %d after reclaiming all pages\n", + dev->priv.vfs_pages); + + return 0; +} + +void mlx5_pagealloc_init(struct mlx5_core_dev *dev) +{ + dev->priv.page_root = RB_ROOT; + INIT_LIST_HEAD(&dev->priv.free_list); +} + +void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) +{ + /* nothing */ +} + +int mlx5_pagealloc_start(struct mlx5_core_dev *dev) +{ + dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); + if (!dev->priv.pg_wq) + return -ENOMEM; + + return 0; +} + +void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) +{ + destroy_workqueue(dev->priv.pg_wq); +} + +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +{ + unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + int prev_vfs_pages = dev->priv.vfs_pages; + + /* In case of internal error we will free the pages manually later */ + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_core_warn(dev, "Skipping wait for vf pages stage"); + return 0; + } + + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + dev->priv.name); + while (dev->priv.vfs_pages) { + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + return -ETIMEDOUT; + } + if (dev->priv.vfs_pages < prev_vfs_pages) { + end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + prev_vfs_pages = dev->priv.vfs_pages; + } + msleep(50); + } + + mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c new file mode 100644 index 000000000..bd830d8d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn) +{ + u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {0}; + int err; + + MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *pdn = MLX5_GET(alloc_pd_out, out, pd); + return err; +} +EXPORT_SYMBOL(mlx5_core_alloc_pd); + +int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {0}; + + MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, in, pd, pdn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_dealloc_pd); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c new file mode 100644 index 000000000..09b6b1bfb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -0,0 +1,1116 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/port.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, + int size_in, void *data_out, int size_out, + u16 reg_id, int arg, int write) +{ + int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out; + int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in; + int err = -ENOMEM; + u32 *out = NULL; + u32 *in = NULL; + void *data; + + in = kvzalloc(inlen, GFP_KERNEL); + out = kvzalloc(outlen, GFP_KERNEL); + if (!in || !out) + goto out; + + data = MLX5_ADDR_OF(access_register_in, in, register_data); + memcpy(data, data_in, size_in); + + MLX5_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG); + MLX5_SET(access_register_in, in, op_mod, !write); + MLX5_SET(access_register_in, in, argument, arg); + MLX5_SET(access_register_in, in, register_id, reg_id); + + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + if (err) + goto out; + + data = MLX5_ADDR_OF(access_register_out, out, register_data); + memcpy(data_out, data, size_out); + +out: + kvfree(out); + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_access_reg); + +int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, + u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(pcam_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(pcam_reg); + + MLX5_SET(pcam_reg, in, feature_group, feature_group); + MLX5_SET(pcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(dev, in, sz, pcam, sz, MLX5_REG_PCAM, 0, 0); +} + +int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcam, u8 feature_group, + u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(mcam_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(mcam_reg); + + MLX5_SET(mcam_reg, in, feature_group, feature_group); + MLX5_SET(mcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(dev, in, sz, mcam, sz, MLX5_REG_MCAM, 0, 0); +} + +int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, + u8 feature_group, u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(qcam_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(qcam_reg); + + MLX5_SET(qcam_reg, in, feature_group, feature_group); + MLX5_SET(qcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(mdev, in, sz, qcam, sz, MLX5_REG_QCAM, 0, 0); +} + +struct mlx5_reg_pcap { + u8 rsvd0; + u8 port_num; + u8 rsvd1[2]; + __be32 caps_127_96; + __be32 caps_95_64; + __be32 caps_63_32; + __be32 caps_31_0; +}; + +int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps) +{ + struct mlx5_reg_pcap in; + struct mlx5_reg_pcap out; + + memset(&in, 0, sizeof(in)); + in.caps_127_96 = cpu_to_be32(caps); + in.port_num = port_num; + + return mlx5_core_access_reg(dev, &in, sizeof(in), &out, + sizeof(out), MLX5_REG_PCAP, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_caps); + +int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, + int ptys_size, int proto_mask, u8 local_port) +{ + u32 in[MLX5_ST_SZ_DW(ptys_reg)] = {0}; + + MLX5_SET(ptys_reg, in, local_port, local_port); + MLX5_SET(ptys_reg, in, proto_mask, proto_mask); + return mlx5_core_access_reg(dev, in, sizeof(in), ptys, + ptys_size, MLX5_REG_PTYS, 0, 0); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_ptys); + +int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) +{ + u32 in[MLX5_ST_SZ_DW(mlcr_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(mlcr_reg)]; + + MLX5_SET(mlcr_reg, in, local_port, 1); + MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration); + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MLCR, 0, 1); +} + +int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, + u32 *proto_cap, int proto_mask) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); + if (err) + return err; + + if (proto_mask == MLX5_PTYS_EN) + *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + else + *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap); + +int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, + u32 *proto_admin, int proto_mask) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); + if (err) + return err; + + if (proto_mask == MLX5_PTYS_EN) + *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + else + *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin); + +int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, + u8 *link_width_oper, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port); + if (err) + return err; + + *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); + +int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, + u32 *proto_oper, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, + local_port); + if (err) + return err; + + *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + + return 0; +} +EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper); + +int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, + u8 *proto_oper, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, + local_port); + if (err) + return err; + + *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + + return 0; +} +EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper); + +int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, int proto_mask) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u8 an_disable_admin; + u8 an_disable_cap; + u8 an_status; + + mlx5_query_port_autoneg(dev, proto_mask, &an_status, + &an_disable_cap, &an_disable_admin); + if (!an_disable_cap && an_disable) + return -EPERM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); + MLX5_SET(ptys_reg, in, proto_mask, proto_mask); + if (proto_mask == MLX5_PTYS_EN) + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + else + MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PTYS, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_ptys); + +/* This function should be used after setting a port register only */ +void mlx5_toggle_port_link(struct mlx5_core_dev *dev) +{ + enum mlx5_port_status ps; + + mlx5_query_port_admin_status(dev, &ps); + mlx5_set_port_admin_status(dev, MLX5_PORT_DOWN); + if (ps == MLX5_PORT_UP) + mlx5_set_port_admin_status(dev, MLX5_PORT_UP); +} +EXPORT_SYMBOL_GPL(mlx5_toggle_port_link); + +int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status) +{ + u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(paos_reg)]; + + MLX5_SET(paos_reg, in, local_port, 1); + MLX5_SET(paos_reg, in, admin_status, status); + MLX5_SET(paos_reg, in, ase, 1); + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PAOS, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status); + +int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status *status) +{ + u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(paos_reg)]; + int err; + + MLX5_SET(paos_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PAOS, 0, 0); + if (err) + return err; + *status = MLX5_GET(paos_reg, out, admin_status); + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status); + +static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu, + u16 *max_mtu, u16 *oper_mtu, u8 port) +{ + u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; + + MLX5_SET(pmtu_reg, in, local_port, port); + mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 0); + + if (max_mtu) + *max_mtu = MLX5_GET(pmtu_reg, out, max_mtu); + if (oper_mtu) + *oper_mtu = MLX5_GET(pmtu_reg, out, oper_mtu); + if (admin_mtu) + *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu); +} + +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port) +{ + u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; + + MLX5_SET(pmtu_reg, in, admin_mtu, mtu); + MLX5_SET(pmtu_reg, in, local_port, port); + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); + +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, + u8 port) +{ + mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu); + +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, + u8 port) +{ + mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); + +static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) +{ + u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pmlp_reg)]; + int module_mapping; + int err; + + MLX5_SET(pmlp_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMLP, 0, 0); + if (err) + return err; + + module_mapping = MLX5_GET(pmlp_reg, out, lane0_module_mapping); + *module_num = module_mapping & MLX5_EEPROM_IDENTIFIER_BYTE_MASK; + + return 0; +} + +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data) +{ + u32 out[MLX5_ST_SZ_DW(mcia_reg)]; + u32 in[MLX5_ST_SZ_DW(mcia_reg)]; + int module_num; + u16 i2c_addr; + int status; + int err; + void *ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + + err = mlx5_query_module_num(dev, &module_num); + if (err) + return err; + + memset(in, 0, sizeof(in)); + size = min_t(int, size, MLX5_EEPROM_MAX_BYTES); + + if (offset < MLX5_EEPROM_PAGE_LENGTH && + offset + size > MLX5_EEPROM_PAGE_LENGTH) + /* Cross pages read, read until offset 256 in low page */ + size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; + + i2c_addr = MLX5_I2C_ADDR_LOW; + + MLX5_SET(mcia_reg, in, l, 0); + MLX5_SET(mcia_reg, in, module, module_num); + MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr); + MLX5_SET(mcia_reg, in, page_number, 0); + MLX5_SET(mcia_reg, in, device_address, offset); + MLX5_SET(mcia_reg, in, size, size); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCIA, 0, 0); + if (err) + return err; + + status = MLX5_GET(mcia_reg, out, status); + if (status) { + mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", + status); + return -EIO; + } + + memcpy(data, ptr, size); + + return size; +} +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); + +static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, + int pvlc_size, u8 local_port) +{ + u32 in[MLX5_ST_SZ_DW(pvlc_reg)] = {0}; + + MLX5_SET(pvlc_reg, in, local_port, local_port); + return mlx5_core_access_reg(dev, in, sizeof(in), pvlc, + pvlc_size, MLX5_REG_PVLC, 0, 0); +} + +int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, + u8 *vl_hw_cap, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(pvlc_reg)]; + int err; + + err = mlx5_query_port_pvlc(dev, out, sizeof(out), local_port); + if (err) + return err; + + *vl_hw_cap = MLX5_GET(pvlc_reg, out, vl_hw_cap); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); + +int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, + u8 port_num, void *out, size_t sz) +{ + u32 *in; + int err; + + in = kvzalloc(sz, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + return err; + } + + MLX5_SET(ppcnt_reg, in, local_port, port_num); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP); + err = mlx5_core_access_reg(dev, in, sz, out, + sz, MLX5_REG_PPCNT, 0, 0); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt); + +static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out, + u32 out_size) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + + MLX5_SET(pfcc_reg, in, local_port, 1); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + out_size, MLX5_REG_PFCC, 0, 0); +} + +int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pptx, tx_pause); + MLX5_SET(pfcc_reg, in, pprx, rx_pause); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_pause); + +int mlx5_query_port_pause(struct mlx5_core_dev *dev, + u32 *rx_pause, u32 *tx_pause) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + err = mlx5_query_pfcc_reg(dev, out, sizeof(out)); + if (err) + return err; + + if (rx_pause) + *rx_pause = MLX5_GET(pfcc_reg, out, pprx); + + if (tx_pause) + *tx_pause = MLX5_GET(pfcc_reg, out, pptx); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_pause); + +int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev, + u16 stall_critical_watermark, + u16 stall_minor_watermark) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pptx_mask_n, 1); + MLX5_SET(pfcc_reg, in, pprx_mask_n, 1); + MLX5_SET(pfcc_reg, in, ppan_mask_n, 1); + MLX5_SET(pfcc_reg, in, critical_stall_mask, 1); + MLX5_SET(pfcc_reg, in, minor_stall_mask, 1); + MLX5_SET(pfcc_reg, in, device_stall_critical_watermark, + stall_critical_watermark); + MLX5_SET(pfcc_reg, in, device_stall_minor_watermark, stall_minor_watermark); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); +} + +int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev, + u16 *stall_critical_watermark, + u16 *stall_minor_watermark) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + err = mlx5_query_pfcc_reg(dev, out, sizeof(out)); + if (err) + return err; + + if (stall_critical_watermark) + *stall_critical_watermark = MLX5_GET(pfcc_reg, out, + device_stall_critical_watermark); + + if (stall_minor_watermark) + *stall_minor_watermark = MLX5_GET(pfcc_reg, out, + device_stall_minor_watermark); + + return 0; +} + +int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx); + MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx); + MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_tx); + MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_rx); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_pfc); + +int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + err = mlx5_query_pfcc_reg(dev, out, sizeof(out)); + if (err) + return err; + + if (pfc_en_tx) + *pfc_en_tx = MLX5_GET(pfcc_reg, out, pfctx); + + if (pfc_en_rx) + *pfc_en_rx = MLX5_GET(pfcc_reg, out, pfcrx); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); + +void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, + u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + *an_status = 0; + *an_disable_cap = 0; + *an_disable_admin = 0; + + if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1)) + return; + + *an_status = MLX5_GET(ptys_reg, out, an_status); + *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); + *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg); + +int mlx5_max_tc(struct mlx5_core_dev *mdev) +{ + u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; + + return num_tc - 1; +} + +int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(dcbx_param)] = {0}; + + MLX5_SET(dcbx_param, in, port_number, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(in), MLX5_REG_DCBX_PARAM, 0, 0); +} + +int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in) +{ + u32 out[MLX5_ST_SZ_DW(dcbx_param)]; + + MLX5_SET(dcbx_param, in, port_number, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(out), out, + sizeof(out), MLX5_REG_DCBX_PARAM, 0, 1); +} + +int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc) +{ + u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(qtct_reg)]; + int err; + int i; + + for (i = 0; i < 8; i++) { + if (prio_tc[i] > mlx5_max_tc(mdev)) + return -EINVAL; + + MLX5_SET(qtct_reg, in, prio, i); + MLX5_SET(qtct_reg, in, tclass, prio_tc[i]); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QTCT, 0, 1); + if (err) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc); + +int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, + u8 prio, u8 *tc) +{ + u32 in[MLX5_ST_SZ_DW(qtct_reg)]; + u32 out[MLX5_ST_SZ_DW(qtct_reg)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(qtct_reg, in, port_number, 1); + MLX5_SET(qtct_reg, in, prio, prio); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QTCT, 0, 0); + if (!err) + *tc = MLX5_GET(qtct_reg, out, tclass); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc); + +static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -EOPNOTSUPP; + + return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out), + MLX5_REG_QETCR, 0, 1); +} + +static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -EOPNOTSUPP; + + memset(in, 0, sizeof(in)); + return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen, + MLX5_REG_QETCR, 0, 0); +} + +int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; + int i; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1); + MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group); + +int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, + u8 tc, u8 *tc_group) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, + tc_configuration[tc]); + + *tc_group = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + group); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_tc_group); + +int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; + int i; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1); + MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc); + +int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, + u8 tc, u8 *bw_pct) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, + tc_configuration[tc]); + + *bw_pct = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + bw_allocation); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_tc_bw_alloc); + +int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_units) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; + void *ets_tcn_conf; + int i; + + MLX5_SET(qetc_reg, in, port_number, 1); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, in, tc_configuration[i]); + + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, r, 1); + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_units, + max_bw_units[i]); + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_value, + max_bw_value[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_modify_port_ets_rate_limit); + +int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_units) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + int i; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, tc_configuration[i]); + + max_bw_value[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + max_bw_value); + max_bw_units[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + max_bw_units); + } + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit); + +int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode) +{ + u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)] = {0}; + + MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL); + MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1); + MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode); + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_wol); + +int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) +{ + u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {0}; + int err; + + MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL); + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) + *wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_wol); + +static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + + MLX5_SET(pcmr_reg, in, local_port, 1); + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + outlen, MLX5_REG_PCMR, 0, 0); +} + +static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + + return mlx5_core_access_reg(mdev, in, inlen, out, + sizeof(out), MLX5_REG_PCMR, 0, 1); +} + +int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, fcs_chk, enable); + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, + bool *enabled) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + /* Default values for FW which do not support MLX5_REG_PCMR */ + *supported = false; + *enabled = true; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return; + + if (mlx5_query_ports_check(mdev, out, sizeof(out))) + return; + + *supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap)); + *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk)); +} + +static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = { + "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */ + "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */ + "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */ +}; + +static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = { + "Power budget exceeded", + "Long Range for non MLNX cable", + "Bus stuck(I2C or data shorted)", + "No EEPROM/retry timeout", + "Enforce part number list", + "Unknown identifier", + "High Temperature", + "Bad or shorted cable/module", + "Unknown status", +}; + +void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) +{ + enum port_module_event_status_type module_status; + enum port_module_event_error_type error_type; + struct mlx5_eqe_port_module *module_event_eqe; + struct mlx5_priv *priv = &dev->priv; + u8 module_num; + + module_event_eqe = &eqe->data.port_module; + module_num = module_event_eqe->module; + module_status = module_event_eqe->module_status & + PORT_MODULE_EVENT_MODULE_STATUS_MASK; + error_type = module_event_eqe->error_type & + PORT_MODULE_EVENT_ERROR_TYPE_MASK; + + if (module_status < MLX5_MODULE_STATUS_ERROR) { + priv->pme_stats.status_counters[module_status - 1]++; + } else if (module_status == MLX5_MODULE_STATUS_ERROR) { + if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN) + /* Unknown error type */ + error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN; + priv->pme_stats.error_counters[error_type]++; + } + + if (!printk_ratelimit()) + return; + + if (module_status < MLX5_MODULE_STATUS_ERROR) + mlx5_core_info(dev, + "Port module event: module %u, %s\n", + module_num, mlx5_pme_status[module_status - 1]); + + else if (module_status == MLX5_MODULE_STATUS_ERROR) + mlx5_core_info(dev, + "Port module event[error]: module %u, %s, %s\n", + module_num, mlx5_pme_status[module_status - 1], + mlx5_pme_error[error_type]); +} + +int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) +{ + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps, + mtpps_size, MLX5_REG_MTPPS, 0, 0); +} + +int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) +{ + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + return mlx5_core_access_reg(mdev, mtpps, mtpps_size, out, + sizeof(out), MLX5_REG_MTPPS, 0, 1); +} + +int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode) +{ + u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + int err = 0; + + MLX5_SET(mtppse_reg, in, pin, pin); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MTPPSE, 0, 0); + if (err) + return err; + + *arm = MLX5_GET(mtppse_reg, in, event_arm); + *mode = MLX5_GET(mtppse_reg, in, event_generation_mode); + + return err; +} + +int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode) +{ + u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + + MLX5_SET(mtppse_reg, in, pin, pin); + MLX5_SET(mtppse_reg, in, event_arm, arm); + MLX5_SET(mtppse_reg, in, event_generation_mode, mode); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MTPPSE, 0, 1); +} + +int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state) +{ + u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {}; + int err; + + MLX5_SET(qpts_reg, in, local_port, 1); + MLX5_SET(qpts_reg, in, trust_state, trust_state); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QPTS, 0, 1); + return err; +} + +int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state) +{ + u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {}; + int err; + + MLX5_SET(qpts_reg, in, local_port, 1); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QPTS, 0, 0); + if (!err) + *trust_state = MLX5_GET(qpts_reg, out, trust_state); + + return err; +} + +int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio) +{ + int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); + void *qpdpm_dscp; + void *out; + void *in; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpdpm_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0); + if (err) + goto out; + + memcpy(in, out, sz); + MLX5_SET(qpdpm_reg, in, local_port, 1); + + /* Update the corresponding dscp entry */ + qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, in, dscp[dscp]); + MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, prio, prio); + MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, e, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 1); + +out: + kfree(in); + kfree(out); + return err; +} + +/* dscp2prio[i]: priority that dscp i mapped to */ +#define MLX5E_SUPPORTED_DSCP 64 +int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio) +{ + int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); + void *qpdpm_dscp; + void *out; + void *in; + int err; + int i; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpdpm_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0); + if (err) + goto out; + + for (i = 0; i < (MLX5E_SUPPORTED_DSCP); i++) { + qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, out, dscp[i]); + dscp2prio[i] = MLX5_GET16(qpdpm_dscp_reg, qpdpm_dscp, prio); + } + +out: + kfree(in); + kfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c new file mode 100644 index 000000000..479ac21cd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -0,0 +1,636 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/gfp.h> +#include <linux/export.h> +#include <linux/mlx5/cmd.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/transobj.h> + +#include "mlx5_core.h" + +static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev, + u32 rsn) +{ + struct mlx5_qp_table *table = &dev->priv.qp_table; + struct mlx5_core_rsc_common *common; + unsigned long flags; + + spin_lock_irqsave(&table->lock, flags); + + common = radix_tree_lookup(&table->tree, rsn); + if (common) + atomic_inc(&common->refcount); + + spin_unlock_irqrestore(&table->lock, flags); + + if (!common) { + mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", + rsn); + return NULL; + } + return common; +} + +void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common) +{ + if (atomic_dec_and_test(&common->refcount)) + complete(&common->free); +} + +static u64 qp_allowed_event_types(void) +{ + u64 mask; + + mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) | + BIT(MLX5_EVENT_TYPE_COMM_EST) | + BIT(MLX5_EVENT_TYPE_SQ_DRAINED) | + BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) | + BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | + BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) | + BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | + BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR); + + return mask; +} + +static u64 rq_allowed_event_types(void) +{ + u64 mask; + + mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) | + BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR); + + return mask; +} + +static u64 sq_allowed_event_types(void) +{ + return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR); +} + +static u64 dct_allowed_event_types(void) +{ + return BIT(MLX5_EVENT_TYPE_DCT_DRAINED); +} + +static bool is_event_type_allowed(int rsc_type, int event_type) +{ + switch (rsc_type) { + case MLX5_EVENT_QUEUE_TYPE_QP: + return BIT(event_type) & qp_allowed_event_types(); + case MLX5_EVENT_QUEUE_TYPE_RQ: + return BIT(event_type) & rq_allowed_event_types(); + case MLX5_EVENT_QUEUE_TYPE_SQ: + return BIT(event_type) & sq_allowed_event_types(); + case MLX5_EVENT_QUEUE_TYPE_DCT: + return BIT(event_type) & dct_allowed_event_types(); + default: + WARN(1, "Event arrived for unknown resource type"); + return false; + } +} + +void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) +{ + struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn); + struct mlx5_core_dct *dct; + struct mlx5_core_qp *qp; + + if (!common) + return; + + if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) { + mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n", + event_type, rsn); + goto out; + } + + switch (common->res) { + case MLX5_RES_QP: + case MLX5_RES_RQ: + case MLX5_RES_SQ: + qp = (struct mlx5_core_qp *)common; + qp->event(qp, event_type); + break; + case MLX5_RES_DCT: + dct = (struct mlx5_core_dct *)common; + if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED) + complete(&dct->drained); + break; + default: + mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn); + } +out: + mlx5_core_put_rsc(common); +} + +static int create_resource_common(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp, + int rsc_type) +{ + struct mlx5_qp_table *table = &dev->priv.qp_table; + int err; + + qp->common.res = rsc_type; + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, + qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN), + qp); + spin_unlock_irq(&table->lock); + if (err) + return err; + + atomic_set(&qp->common.refcount, 1); + init_completion(&qp->common.free); + qp->pid = current->pid; + + return 0; +} + +static void destroy_resource_common(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp) +{ + struct mlx5_qp_table *table = &dev->priv.qp_table; + unsigned long flags; + + spin_lock_irqsave(&table->lock, flags); + radix_tree_delete(&table->tree, + qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN)); + spin_unlock_irqrestore(&table->lock, flags); + mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp); + wait_for_completion(&qp->common.free); +} + +int mlx5_core_create_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct, + u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0}; + u32 din[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; + u32 dout[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + int err; + + init_completion(&dct->drained); + MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT); + + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) { + mlx5_core_warn(dev, "create DCT failed, ret %d\n", err); + return err; + } + + qp->qpn = MLX5_GET(create_dct_out, out, dctn); + err = create_resource_common(dev, qp, MLX5_RES_DCT); + if (err) + goto err_cmd; + + return 0; +err_cmd: + MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, dctn, qp->qpn); + mlx5_cmd_exec(dev, (void *)&in, sizeof(din), + (void *)&out, sizeof(dout)); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_create_dct); + +int mlx5_core_create_qp(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp, + u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {0}; + u32 dout[MLX5_ST_SZ_DW(destroy_qp_out)]; + u32 din[MLX5_ST_SZ_DW(destroy_qp_in)]; + int err; + + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (err) + return err; + + qp->qpn = MLX5_GET(create_qp_out, out, qpn); + mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); + + err = create_resource_common(dev, qp, MLX5_RES_QP); + if (err) + goto err_cmd; + + err = mlx5_debug_qp_add(dev, qp); + if (err) + mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n", + qp->qpn); + + atomic_inc(&dev->num_qps); + + return 0; + +err_cmd: + memset(din, 0, sizeof(din)); + memset(dout, 0, sizeof(dout)); + MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, qpn, qp->qpn); + mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_create_qp); + +static int mlx5_core_drain_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct) +{ + u32 out[MLX5_ST_SZ_DW(drain_dct_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(drain_dct_in)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + + MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT); + MLX5_SET(drain_dct_in, in, dctn, qp->qpn); + return mlx5_cmd_exec(dev, (void *)&in, sizeof(in), + (void *)&out, sizeof(out)); +} + +int mlx5_core_destroy_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct) +{ + u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + int err; + + err = mlx5_core_drain_dct(dev, dct); + if (err) { + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + goto destroy; + } else { + mlx5_core_warn(dev, "failed drain DCT 0x%x with error 0x%x\n", qp->qpn, err); + return err; + } + } + wait_for_completion(&dct->drained); +destroy: + destroy_resource_common(dev, &dct->mqp); + MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); + err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in), + (void *)&out, sizeof(out)); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct); + +int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp) +{ + u32 out[MLX5_ST_SZ_DW(destroy_qp_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {0}; + int err; + + mlx5_debug_qp_remove(dev, qp); + + destroy_resource_common(dev, qp); + + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + atomic_dec(&dev->num_qps); + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp); + +int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev, + u32 timeout_usec) +{ + u32 out[MLX5_ST_SZ_DW(set_delay_drop_params_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(set_delay_drop_params_in)] = {0}; + + MLX5_SET(set_delay_drop_params_in, in, opcode, + MLX5_CMD_OP_SET_DELAY_DROP_PARAMS); + MLX5_SET(set_delay_drop_params_in, in, delay_drop_timeout, + timeout_usec / 100); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_set_delay_drop); + +struct mbox_info { + u32 *in; + u32 *out; + int inlen; + int outlen; +}; + +static int mbox_alloc(struct mbox_info *mbox, int inlen, int outlen) +{ + mbox->inlen = inlen; + mbox->outlen = outlen; + mbox->in = kzalloc(mbox->inlen, GFP_KERNEL); + mbox->out = kzalloc(mbox->outlen, GFP_KERNEL); + if (!mbox->in || !mbox->out) { + kfree(mbox->in); + kfree(mbox->out); + return -ENOMEM; + } + + return 0; +} + +static void mbox_free(struct mbox_info *mbox) +{ + kfree(mbox->in); + kfree(mbox->out); +} + +static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, + u32 opt_param_mask, void *qpc, + struct mbox_info *mbox) +{ + mbox->out = NULL; + mbox->in = NULL; + +#define MBOX_ALLOC(mbox, typ) \ + mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out)) + +#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \ + MLX5_SET(typ##_in, in, opcode, _opcode); \ + MLX5_SET(typ##_in, in, qpn, _qpn) + +#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \ + MOD_QP_IN_SET(typ, in, _opcode, _qpn); \ + MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \ + memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc)) + + switch (opcode) { + /* 2RST & 2ERR */ + case MLX5_CMD_OP_2RST_QP: + if (MBOX_ALLOC(mbox, qp_2rst)) + return -ENOMEM; + MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn); + break; + case MLX5_CMD_OP_2ERR_QP: + if (MBOX_ALLOC(mbox, qp_2err)) + return -ENOMEM; + MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn); + break; + + /* MODIFY with QPC */ + case MLX5_CMD_OP_RST2INIT_QP: + if (MBOX_ALLOC(mbox, rst2init_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_INIT2RTR_QP: + if (MBOX_ALLOC(mbox, init2rtr_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + if (MBOX_ALLOC(mbox, rtr2rts_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_RTS2RTS_QP: + if (MBOX_ALLOC(mbox, rts2rts_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_SQERR2RTS_QP: + if (MBOX_ALLOC(mbox, sqerr2rts_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_INIT2INIT_QP: + if (MBOX_ALLOC(mbox, init2init_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + default: + mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n", + opcode, qpn); + return -EINVAL; + } + return 0; +} + +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode, + u32 opt_param_mask, void *qpc, + struct mlx5_core_qp *qp) +{ + struct mbox_info mbox; + int err; + + err = modify_qp_mbox_alloc(dev, opcode, qp->qpn, + opt_param_mask, qpc, &mbox); + if (err) + return err; + + err = mlx5_cmd_exec(dev, mbox.in, mbox.inlen, mbox.out, mbox.outlen); + mbox_free(&mbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_qp_modify); + +void mlx5_init_qp_table(struct mlx5_core_dev *dev) +{ + struct mlx5_qp_table *table = &dev->priv.qp_table; + + memset(table, 0, sizeof(*table)); + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + mlx5_qp_debugfs_init(dev); +} + +void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev) +{ + mlx5_qp_debugfs_cleanup(dev); +} + +int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {0}; + + MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP); + MLX5_SET(query_qp_in, in, qpn, qp->qpn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} +EXPORT_SYMBOL_GPL(mlx5_core_qp_query); + +int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_dct_in)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + + MLX5_SET(query_dct_in, in, opcode, MLX5_CMD_OP_QUERY_DCT); + MLX5_SET(query_dct_in, in, dctn, qp->qpn); + + return mlx5_cmd_exec(dev, (void *)&in, sizeof(in), + (void *)out, outlen); +} +EXPORT_SYMBOL_GPL(mlx5_core_dct_query); + +int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) +{ + u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {0}; + int err; + + MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc); + +int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {0}; + + MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); + +int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, + struct mlx5_core_qp *rq) +{ + int err; + u32 rqn; + + err = mlx5_core_create_rq(dev, in, inlen, &rqn); + if (err) + return err; + + rq->qpn = rqn; + err = create_resource_common(dev, rq, MLX5_RES_RQ); + if (err) + goto err_destroy_rq; + + return 0; + +err_destroy_rq: + mlx5_core_destroy_rq(dev, rq->qpn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_rq_tracked); + +void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, + struct mlx5_core_qp *rq) +{ + destroy_resource_common(dev, rq); + mlx5_core_destroy_rq(dev, rq->qpn); +} +EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked); + +int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, + struct mlx5_core_qp *sq) +{ + int err; + u32 sqn; + + err = mlx5_core_create_sq(dev, in, inlen, &sqn); + if (err) + return err; + + sq->qpn = sqn; + err = create_resource_common(dev, sq, MLX5_RES_SQ); + if (err) + goto err_destroy_sq; + + return 0; + +err_destroy_sq: + mlx5_core_destroy_sq(dev, sq->qpn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_sq_tracked); + +void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, + struct mlx5_core_qp *sq) +{ + destroy_resource_common(dev, sq); + mlx5_core_destroy_sq(dev, sq->qpn); +} +EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); + +int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id) +{ + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0}; + int err; + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *counter_id = MLX5_GET(alloc_q_counter_out, out, + counter_set_id); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter); + +int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)] = {0}; + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter); + +int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, + int reset, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {0}; + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + MLX5_SET(query_q_counter_in, in, clear, reset); + MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id); + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); +} +EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c new file mode 100644 index 000000000..bc86dffdc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +/* Scheduling element fw management */ +int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *ctx, u32 *element_id) +{ + u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0}; + void *schedc; + int err; + + schedc = MLX5_ADDR_OF(create_scheduling_element_in, in, + scheduling_context); + MLX5_SET(create_scheduling_element_in, in, opcode, + MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT); + MLX5_SET(create_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context)); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *element_id = MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + return 0; +} + +int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *ctx, u32 element_id, + u32 modify_bitmask) +{ + u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0}; + void *schedc; + + schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in, + scheduling_context); + MLX5_SET(modify_scheduling_element_in, in, opcode, + MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT); + MLX5_SET(modify_scheduling_element_in, in, scheduling_element_id, + element_id); + MLX5_SET(modify_scheduling_element_in, in, modify_bitmask, + modify_bitmask); + MLX5_SET(modify_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context)); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + u32 element_id) +{ + u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0}; + + MLX5_SET(destroy_scheduling_element_in, in, opcode, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); + MLX5_SET(destroy_scheduling_element_in, in, scheduling_element_id, + element_id); + MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +/* Finds an entry where we can register the given rate + * If the rate already exists, return the entry where it is registered, + * otherwise return the first available entry. + * If the table is full, return NULL + */ +static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, + struct mlx5_rate_limit *rl) +{ + struct mlx5_rl_entry *ret_entry = NULL; + bool empty_found = false; + int i; + + for (i = 0; i < table->max_size; i++) { + if (mlx5_rl_are_equal(&table->rl_entry[i].rl, rl)) + return &table->rl_entry[i]; + if (!empty_found && !table->rl_entry[i].rl.rate) { + empty_found = true; + ret_entry = &table->rl_entry[i]; + } + } + + return ret_entry; +} + +static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev, + u16 index, + struct mlx5_rate_limit *rl) +{ + u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0}; + + MLX5_SET(set_pp_rate_limit_in, in, opcode, + MLX5_CMD_OP_SET_PP_RATE_LIMIT); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rl->rate); + MLX5_SET(set_pp_rate_limit_in, in, burst_upper_bound, rl->max_burst_sz); + MLX5_SET(set_pp_rate_limit_in, in, typical_packet_size, rl->typical_pkt_sz); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + + return (rate <= table->max_rate && rate >= table->min_rate); +} +EXPORT_SYMBOL(mlx5_rl_is_in_range); + +bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0, + struct mlx5_rate_limit *rl_1) +{ + return ((rl_0->rate == rl_1->rate) && + (rl_0->max_burst_sz == rl_1->max_burst_sz) && + (rl_0->typical_pkt_sz == rl_1->typical_pkt_sz)); +} +EXPORT_SYMBOL(mlx5_rl_are_equal); + +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, + struct mlx5_rate_limit *rl) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry; + int err = 0; + + mutex_lock(&table->rl_lock); + + if (!rl->rate || !mlx5_rl_is_in_range(dev, rl->rate)) { + mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n", + rl->rate, table->min_rate, table->max_rate); + err = -EINVAL; + goto out; + } + + entry = find_rl_entry(table, rl); + if (!entry) { + mlx5_core_err(dev, "Max number of %u rates reached\n", + table->max_size); + err = -ENOSPC; + goto out; + } + if (entry->refcount) { + /* rate already configured */ + entry->refcount++; + } else { + /* new rate limit */ + err = mlx5_set_pp_rate_limit_cmd(dev, entry->index, rl); + if (err) { + mlx5_core_err(dev, "Failed configuring rate limit(err %d): \ + rate %u, max_burst_sz %u, typical_pkt_sz %u\n", + err, rl->rate, rl->max_burst_sz, + rl->typical_pkt_sz); + goto out; + } + entry->rl = *rl; + entry->refcount = 1; + } + *index = entry->index; + +out: + mutex_unlock(&table->rl_lock); + return err; +} +EXPORT_SYMBOL(mlx5_rl_add_rate); + +void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry = NULL; + struct mlx5_rate_limit reset_rl = {0}; + + /* 0 is a reserved value for unlimited rate */ + if (rl->rate == 0) + return; + + mutex_lock(&table->rl_lock); + entry = find_rl_entry(table, rl); + if (!entry || !entry->refcount) { + mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u \ + are not configured\n", + rl->rate, rl->max_burst_sz, rl->typical_pkt_sz); + goto out; + } + + entry->refcount--; + if (!entry->refcount) { + /* need to remove rate */ + mlx5_set_pp_rate_limit_cmd(dev, entry->index, &reset_rl); + entry->rl = reset_rl; + } + +out: + mutex_unlock(&table->rl_lock); +} +EXPORT_SYMBOL(mlx5_rl_remove_rate); + +int mlx5_init_rl_table(struct mlx5_core_dev *dev) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + int i; + + mutex_init(&table->rl_lock); + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) { + table->max_size = 0; + return 0; + } + + /* First entry is reserved for unlimited rate */ + table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1; + table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate); + table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate); + + table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry), + GFP_KERNEL); + if (!table->rl_entry) + return -ENOMEM; + + /* The index represents the index in HW rate limit table + * Index 0 is reserved for unlimited rate + */ + for (i = 0; i < table->max_size; i++) + table->rl_entry[i].index = i + 1; + + /* Index 0 is reserved */ + mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n", + table->max_size, + table->min_rate >> 10, + table->max_rate >> 10); + + return 0; +} + +void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rate_limit rl = {0}; + int i; + + /* Clear all configured rates */ + for (i = 0; i < table->max_size; i++) + if (table->rl_entry[i].rl.rate) + mlx5_set_pp_rate_limit_cmd(dev, table->rl_entry[i].index, + &rl); + + kfree(dev->priv.rl_table.rl_entry); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c new file mode 100644 index 000000000..a0674962f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/pci.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> +#include "mlx5_core.h" +#include "eswitch.h" + +bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + return !!sriov->num_vfs; +} + +static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct mlx5_hca_vport_context *in; + int err = 0; + + /* Restore sriov guid and policy settings */ + if (sriov->vfs_ctx[vf].node_guid || + sriov->vfs_ctx[vf].port_guid || + sriov->vfs_ctx[vf].policy != MLX5_POLICY_INVALID) { + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->node_guid = sriov->vfs_ctx[vf].node_guid; + in->port_guid = sriov->vfs_ctx[vf].port_guid; + in->policy = sriov->vfs_ctx[vf].policy; + in->field_select = + !!(in->port_guid) * MLX5_HCA_VPORT_SEL_PORT_GUID | + !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID | + !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY; + + err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in); + if (err) + mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf); + + kfree(in); + } + + return err; +} + +static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + int vf; + + if (sriov->enabled_vfs) { + mlx5_core_warn(dev, + "failed to enable SRIOV on device, already enabled with %d vfs\n", + sriov->enabled_vfs); + return -EBUSY; + } + + if (!MLX5_ESWITCH_MANAGER(dev)) + goto enable_vfs_hca; + + err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); + if (err) { + mlx5_core_warn(dev, + "failed to enable eswitch SRIOV (%d)\n", err); + return err; + } + +enable_vfs_hca: + for (vf = 0; vf < num_vfs; vf++) { + err = mlx5_core_enable_hca(dev, vf + 1); + if (err) { + mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err); + continue; + } + sriov->vfs_ctx[vf].enabled = 1; + sriov->enabled_vfs++; + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { + err = sriov_restore_guids(dev, vf); + if (err) { + mlx5_core_warn(dev, + "failed to restore VF %d settings, err %d\n", + vf, err); + continue; + } + } + mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); + } + + return 0; +} + +static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + int vf; + + if (!sriov->enabled_vfs) + goto out; + + for (vf = 0; vf < sriov->num_vfs; vf++) { + if (!sriov->vfs_ctx[vf].enabled) + continue; + err = mlx5_core_disable_hca(dev, vf + 1); + if (err) { + mlx5_core_warn(dev, "failed to disable VF %d\n", vf); + continue; + } + sriov->vfs_ctx[vf].enabled = 0; + sriov->enabled_vfs--; + } + +out: + if (MLX5_ESWITCH_MANAGER(dev)) + mlx5_eswitch_disable_sriov(dev->priv.eswitch); + + if (mlx5_wait_for_vf_pages(dev)) + mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); +} + +static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err = 0; + + if (pci_num_vf(pdev)) { + mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n"); + return -EBUSY; + } + + err = pci_enable_sriov(pdev, num_vfs); + if (err) + mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); + + return err; +} + +static void mlx5_pci_disable_sriov(struct pci_dev *pdev) +{ + pci_disable_sriov(pdev); +} + +static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err = 0; + + err = mlx5_device_enable_sriov(dev, num_vfs); + if (err) { + mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err); + return err; + } + + err = mlx5_pci_enable_sriov(pdev, num_vfs); + if (err) { + mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err); + mlx5_device_disable_sriov(dev); + return err; + } + + sriov->num_vfs = num_vfs; + + return 0; +} + +static void mlx5_sriov_disable(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + mlx5_pci_disable_sriov(pdev); + mlx5_device_disable_sriov(dev); + sriov->num_vfs = 0; +} + +int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err = 0; + + mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs); + if (!mlx5_core_is_pf(dev)) + return -EPERM; + + if (num_vfs) { + int ret; + + ret = mlx5_lag_forbid(dev); + if (ret && (ret != -ENODEV)) + return ret; + } + + if (num_vfs) { + err = mlx5_sriov_enable(pdev, num_vfs); + } else { + mlx5_sriov_disable(pdev); + mlx5_lag_allow(dev); + } + + return err ? err : num_vfs; +} + +int mlx5_sriov_attach(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + if (!mlx5_core_is_pf(dev) || !sriov->num_vfs) + return 0; + + /* If sriov VFs exist in PCI level, enable them in device level */ + return mlx5_device_enable_sriov(dev, sriov->num_vfs); +} + +void mlx5_sriov_detach(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev)) + return; + + mlx5_device_disable_sriov(dev); +} + +int mlx5_sriov_init(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct pci_dev *pdev = dev->pdev; + int total_vfs; + + if (!mlx5_core_is_pf(dev)) + return 0; + + total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->num_vfs = pci_num_vf(pdev); + sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + if (!sriov->vfs_ctx) + return -ENOMEM; + + return 0; +} + +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + if (!mlx5_core_is_pf(dev)) + return; + + kfree(sriov->vfs_ctx); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c new file mode 100644 index 000000000..23cc337a9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -0,0 +1,692 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include <linux/mlx5/srq.h> +#include <rdma/ib_verbs.h> +#include "mlx5_core.h" +#include <linux/mlx5/transobj.h> + +void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *srq; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + if (!srq) { + mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn); + return; + } + + srq->event(srq, event_type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); +} + +static int get_pas_size(struct mlx5_srq_attr *in) +{ + u32 log_page_size = in->log_page_size + 12; + u32 log_srq_size = in->log_size; + u32 log_rq_stride = in->wqe_shift; + u32 page_offset = in->page_offset; + u32 po_quanta = 1 << (log_page_size - 6); + u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); + u32 page_size = 1 << log_page_size; + u32 rq_sz_po = rq_sz + (page_offset * po_quanta); + u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; + + return rq_num_pas * sizeof(u64); +} + +static void set_wq(void *wq, struct mlx5_srq_attr *in) +{ + MLX5_SET(wq, wq, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); + MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); + MLX5_SET(wq, wq, log_wq_sz, in->log_size); + MLX5_SET(wq, wq, page_offset, in->page_offset); + MLX5_SET(wq, wq, lwm, in->lwm); + MLX5_SET(wq, wq, pd, in->pd); + MLX5_SET64(wq, wq, dbr_addr, in->db_record); +} + +static void set_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + MLX5_SET(srqc, srqc, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); + MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); + MLX5_SET(srqc, srqc, log_srq_size, in->log_size); + MLX5_SET(srqc, srqc, page_offset, in->page_offset); + MLX5_SET(srqc, srqc, lwm, in->lwm); + MLX5_SET(srqc, srqc, pd, in->pd); + MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); + MLX5_SET(srqc, srqc, xrcd, in->xrcd); + MLX5_SET(srqc, srqc, cqn, in->cqn); +} + +static void get_wq(void *wq, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(wq, wq, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); + in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; + in->log_size = MLX5_GET(wq, wq, log_wq_sz); + in->page_offset = MLX5_GET(wq, wq, page_offset); + in->lwm = MLX5_GET(wq, wq, lwm); + in->pd = MLX5_GET(wq, wq, pd); + in->db_record = MLX5_GET64(wq, wq, dbr_addr); +} + +static void get_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(srqc, srqc, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); + in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); + in->log_size = MLX5_GET(srqc, srqc, log_srq_size); + in->page_offset = MLX5_GET(srqc, srqc, page_offset); + in->lwm = MLX5_GET(srqc, srqc, lwm); + in->pd = MLX5_GET(srqc, srqc, pd); + in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); +} + +struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *srq; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + return srq; +} +EXPORT_SYMBOL(mlx5_core_get_srq); + +static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; + void *create_in; + void *srqc; + void *pas; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); + pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); + + set_srqc(srqc, in); + memcpy(pas, in->pas, pas_size); + + MLX5_SET(create_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_SRQ); + + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) + srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); + + return err; +} + +static int destroy_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; + + MLX5_SET(destroy_srq_in, srq_in, opcode, + MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); + + return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); +} + +static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + + MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); + MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, srq_in, lwm, lwm); + + return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); +} + +static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; + u32 *srq_out; + void *srqc; + int err; + + srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL); + if (!srq_out) + return -ENOMEM; + + MLX5_SET(query_srq_in, srq_in, opcode, + MLX5_CMD_OP_QUERY_SRQ); + MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); + err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, MLX5_ST_SZ_BYTES(query_srq_out)); + if (err) + goto out; + + srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); + get_srqc(srqc, out); + if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; +out: + kvfree(srq_out); + return err; +} + +static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + void *create_in; + void *xrc_srqc; + void *pas; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, + xrc_srq_context_entry); + pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); + + set_srqc(xrc_srqc, in); + MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); + memcpy(pas, in->pas, pas_size); + MLX5_SET(create_xrc_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_XRC_SRQ); + + memset(create_out, 0, sizeof(create_out)); + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); + if (err) + goto out; + + srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); +out: + kvfree(create_in); + return err; +} + +static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; + + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + + return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, u16 lwm) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; + + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); + + return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 *xrcsrq_out; + void *xrc_srqc; + int err; + + xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); + if (!xrcsrq_out) + return -ENOMEM; + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + + MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + + err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (err) + goto out; + + xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, + xrc_srq_context_entry); + get_srqc(xrc_srqc, out); + if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + +out: + kvfree(xrcsrq_out); + return err; +} + +static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + void *create_in; + void *rmpc; + void *wq; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + + err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); + + kvfree(create_in); + return err; +} + +static int destroy_rmp_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + return mlx5_core_destroy_rmp(dev, srq->srqn); +} + +static int arm_rmp_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + void *in; + void *rmpc; + void *wq; + void *bitmask; + int err; + + in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + + err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); + + kvfree(in); + return err; +} + +static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 *rmp_out; + void *rmpc; + int err; + + rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL); + if (!rmp_out) + return -ENOMEM; + + err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out); + if (err) + goto out; + + rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); + get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); + if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) + out->flags |= MLX5_SRQ_FLAG_ERR; + +out: + kvfree(rmp_out); + return err; +} + +static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; + void *create_in; + void *xrqc; + void *wq; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); + wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + + if (in->type == IB_SRQT_TM) { + MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); + if (in->flags & MLX5_SRQ_FLAG_RNDV) + MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); + MLX5_SET(xrqc, xrqc, + tag_matching_topology_context.log_matching_list_sz, + in->tm_log_list_size); + } + MLX5_SET(xrqc, xrqc, user_index, in->user_index); + MLX5_SET(xrqc, xrqc, cqn, in->cqn); + MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) + srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); + + return err; +} + +static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; + + MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_xrq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + + MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); + MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, in, lwm, lwm); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; + u32 *xrq_out; + int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); + void *xrqc; + int err; + + xrq_out = kvzalloc(outlen, GFP_KERNEL); + if (!xrq_out) + return -ENOMEM; + + MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); + MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); + + err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen); + if (err) + goto out; + + xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); + get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); + if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + out->tm_next_tag = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.append_next_index); + out->tm_hw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.hw_phase_cnt); + out->tm_sw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.sw_phase_cnt); + +out: + kvfree(xrq_out); + return err; +} + +static int create_srq_split(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + if (!dev->issi) + return create_srq_cmd(dev, srq, in); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return create_xrc_srq_cmd(dev, srq, in); + case MLX5_RES_XRQ: + return create_xrq_cmd(dev, srq, in); + default: + return create_rmp_cmd(dev, srq, in); + } +} + +static int destroy_srq_split(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + if (!dev->issi) + return destroy_srq_cmd(dev, srq); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return destroy_xrc_srq_cmd(dev, srq); + case MLX5_RES_XRQ: + return destroy_xrq_cmd(dev, srq); + default: + return destroy_rmp_cmd(dev, srq); + } +} + +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + int err; + struct mlx5_srq_table *table = &dev->priv.srq_table; + + switch (in->type) { + case IB_SRQT_XRC: + srq->common.res = MLX5_RES_XSRQ; + break; + case IB_SRQT_TM: + srq->common.res = MLX5_RES_XRQ; + break; + default: + srq->common.res = MLX5_RES_SRQ; + } + + err = create_srq_split(dev, srq, in); + if (err) + return err; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, srq->srqn, srq); + spin_unlock_irq(&table->lock); + if (err) { + mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn); + goto err_destroy_srq_split; + } + + return 0; + +err_destroy_srq_split: + destroy_srq_split(dev, srq); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_srq); + +int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *tmp; + int err; + + spin_lock_irq(&table->lock); + tmp = radix_tree_delete(&table->tree, srq->srqn); + spin_unlock_irq(&table->lock); + if (!tmp) { + mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn); + return -EINVAL; + } + if (tmp != srq) { + mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn); + return -EINVAL; + } + + err = destroy_srq_split(dev, srq); + if (err) + return err; + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + return 0; +} +EXPORT_SYMBOL(mlx5_core_destroy_srq); + +int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + if (!dev->issi) + return query_srq_cmd(dev, srq, out); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return query_xrc_srq_cmd(dev, srq, out); + case MLX5_RES_XRQ: + return query_xrq_cmd(dev, srq, out); + default: + return query_rmp_cmd(dev, srq, out); + } +} +EXPORT_SYMBOL(mlx5_core_query_srq); + +int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + if (!dev->issi) + return arm_srq_cmd(dev, srq, lwm, is_srq); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return arm_xrc_srq_cmd(dev, srq, lwm); + case MLX5_RES_XRQ: + return arm_xrq_cmd(dev, srq, lwm); + default: + return arm_rmp_cmd(dev, srq, lwm); + } +} +EXPORT_SYMBOL(mlx5_core_arm_srq); + +void mlx5_init_srq_table(struct mlx5_core_dev *dev) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + + memset(table, 0, sizeof(*table)); + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); +} + +void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev) +{ + /* nothing */ +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c new file mode 100644 index 000000000..a1ee9a8a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -0,0 +1,621 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include <linux/mlx5/transobj.h> + +int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn) +{ + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0}; + int err; + + MLX5_SET(alloc_transport_domain_in, in, opcode, + MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *tdn = MLX5_GET(alloc_transport_domain_out, out, + transport_domain); + + return err; +} +EXPORT_SYMBOL(mlx5_core_alloc_transport_domain); + +void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0}; + + MLX5_SET(dealloc_transport_domain_in, in, opcode, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain); + +int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) +{ + u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {0}; + int err; + + MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *rqn = MLX5_GET(create_rq_out, out, rqn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_rq); + +int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rq_out)]; + + MLX5_SET(modify_rq_in, in, rqn, rqn); + MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_modify_rq); + +void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {0}; + + MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, in, rqn, rqn); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_destroy_rq); + +int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0}; + int outlen = MLX5_ST_SZ_BYTES(query_rq_out); + + MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ); + MLX5_SET(query_rq_in, in, rqn, rqn); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} +EXPORT_SYMBOL(mlx5_core_query_rq); + +int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn) +{ + u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {0}; + int err; + + MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *sqn = MLX5_GET(create_sq_out, out, sqn); + + return err; +} + +int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_sq_out)] = {0}; + + MLX5_SET(modify_sq_in, in, sqn, sqn); + MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_modify_sq); + +void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {0}; + + MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, in, sqn, sqn); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0}; + int outlen = MLX5_ST_SZ_BYTES(query_sq_out); + + MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ); + MLX5_SET(query_sq_in, in, sqn, sqn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} +EXPORT_SYMBOL(mlx5_core_query_sq); + +int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state) +{ + void *out; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(query_sq_out); + out = kvzalloc(inlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_sq(dev, sqn, out); + if (err) + goto out; + + sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context); + *state = MLX5_GET(sqc, sqc, state); + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state); + +int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *tirn) +{ + u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {0}; + int err; + + MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *tirn = MLX5_GET(create_tir_out, out, tirn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_tir); + +int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_tir_out)] = {0}; + + MLX5_SET(modify_tir_in, in, tirn, tirn); + MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} + +void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {0}; + + MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, in, tirn, tirn); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_destroy_tir); + +int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *tisn) +{ + u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {0}; + int err; + + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *tisn = MLX5_GET(create_tis_out, out, tisn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_tis); + +int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0}; + + MLX5_SET(modify_tis_in, in, tisn, tisn); + MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS); + + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_modify_tis); + +void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0}; + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_destroy_tis); + +int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn) +{ + u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0}; + int err; + + MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *rmpn = MLX5_GET(create_rmp_out, out, rmpn); + + return err; +} + +int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; + + MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} + +int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0}; + + MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, in, rmpn, rmpn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0}; + int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); + + MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); + MLX5_SET(query_rmp_in, in, rmpn, rmpn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) +{ + void *in; + void *rmpc; + void *wq; + void *bitmask; + int err; + + in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, rmpn); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + + err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); + + kvfree(in); + + return err; +} + +int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *xsrqn) +{ + u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0}; + int err; + + MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn); + + return err; +} + +int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; + + MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) +{ + u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; + + MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn); + MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, in, op_mod, + MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rqtn) +{ + u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0}; + int err; + + MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *rqtn = MLX5_GET(create_rqt_out, out, rqtn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_rqt); + +int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {0}; + + MLX5_SET(modify_rqt_in, in, rqtn, rqtn); + MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} + +void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0}; + + MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_destroy_rqt); + +static int mlx5_hairpin_create_rq(struct mlx5_core_dev *mdev, + struct mlx5_hairpin_params *params, u32 *rqn) +{ + u32 in[MLX5_ST_SZ_DW(create_rq_in)] = {0}; + void *rqc, *wq; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(rqc, rqc, hairpin, 1); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, counter_set_id, params->q_counter); + + MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size); + MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets); + + return mlx5_core_create_rq(mdev, in, MLX5_ST_SZ_BYTES(create_rq_in), rqn); +} + +static int mlx5_hairpin_create_sq(struct mlx5_core_dev *mdev, + struct mlx5_hairpin_params *params, u32 *sqn) +{ + u32 in[MLX5_ST_SZ_DW(create_sq_in)] = {0}; + void *sqc, *wq; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(sqc, sqc, hairpin, 1); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + + MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size); + MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets); + + return mlx5_core_create_sq(mdev, in, MLX5_ST_SZ_BYTES(create_sq_in), sqn); +} + +static int mlx5_hairpin_create_queues(struct mlx5_hairpin *hp, + struct mlx5_hairpin_params *params) +{ + int i, j, err; + + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_create_rq(hp->func_mdev, params, &hp->rqn[i]); + if (err) + goto out_err_rq; + } + + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_create_sq(hp->peer_mdev, params, &hp->sqn[i]); + if (err) + goto out_err_sq; + } + + return 0; + +out_err_sq: + for (j = 0; j < i; j++) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[j]); + i = hp->num_channels; +out_err_rq: + for (j = 0; j < i; j++) + mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[j]); + return err; +} + +static void mlx5_hairpin_destroy_queues(struct mlx5_hairpin *hp) +{ + int i; + + for (i = 0; i < hp->num_channels; i++) { + mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[i]); + if (!hp->peer_gone) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); + } +} + +static int mlx5_hairpin_modify_rq(struct mlx5_core_dev *func_mdev, u32 rqn, + int curr_state, int next_state, + u16 peer_vhca, u32 peer_sq) +{ + u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {0}; + void *rqc; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + if (next_state == MLX5_RQC_STATE_RDY) { + MLX5_SET(rqc, rqc, hairpin_peer_sq, peer_sq); + MLX5_SET(rqc, rqc, hairpin_peer_vhca, peer_vhca); + } + + MLX5_SET(modify_rq_in, in, rq_state, curr_state); + MLX5_SET(rqc, rqc, state, next_state); + + return mlx5_core_modify_rq(func_mdev, rqn, + in, MLX5_ST_SZ_BYTES(modify_rq_in)); +} + +static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn, + int curr_state, int next_state, + u16 peer_vhca, u32 peer_rq) +{ + u32 in[MLX5_ST_SZ_DW(modify_sq_in)] = {0}; + void *sqc; + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + + if (next_state == MLX5_SQC_STATE_RDY) { + MLX5_SET(sqc, sqc, hairpin_peer_rq, peer_rq); + MLX5_SET(sqc, sqc, hairpin_peer_vhca, peer_vhca); + } + + MLX5_SET(modify_sq_in, in, sq_state, curr_state); + MLX5_SET(sqc, sqc, state, next_state); + + return mlx5_core_modify_sq(peer_mdev, sqn, + in, MLX5_ST_SZ_BYTES(modify_sq_in)); +} + +static int mlx5_hairpin_pair_queues(struct mlx5_hairpin *hp) +{ + int i, j, err; + + /* set peer SQs */ + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], + MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, + MLX5_CAP_GEN(hp->func_mdev, vhca_id), hp->rqn[i]); + if (err) + goto err_modify_sq; + } + + /* set func RQs */ + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], + MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY, + MLX5_CAP_GEN(hp->peer_mdev, vhca_id), hp->sqn[i]); + if (err) + goto err_modify_rq; + } + + return 0; + +err_modify_rq: + for (j = 0; j < i; j++) + mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[j], MLX5_RQC_STATE_RDY, + MLX5_RQC_STATE_RST, 0, 0); + i = hp->num_channels; +err_modify_sq: + for (j = 0; j < i; j++) + mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[j], MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RST, 0, 0); + return err; +} + +static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp) +{ + int i; + + /* unset func RQs */ + for (i = 0; i < hp->num_channels; i++) + mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY, + MLX5_RQC_STATE_RST, 0, 0); + + /* unset peer SQs */ + if (hp->peer_gone) + return; + for (i = 0; i < hp->num_channels; i++) + mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RST, 0, 0); +} + +struct mlx5_hairpin * +mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev, + struct mlx5_core_dev *peer_mdev, + struct mlx5_hairpin_params *params) +{ + struct mlx5_hairpin *hp; + int size, err; + + size = sizeof(*hp) + params->num_channels * 2 * sizeof(u32); + hp = kzalloc(size, GFP_KERNEL); + if (!hp) + return ERR_PTR(-ENOMEM); + + hp->func_mdev = func_mdev; + hp->peer_mdev = peer_mdev; + hp->num_channels = params->num_channels; + + hp->rqn = (void *)hp + sizeof(*hp); + hp->sqn = hp->rqn + params->num_channels; + + /* alloc and pair func --> peer hairpin */ + err = mlx5_hairpin_create_queues(hp, params); + if (err) + goto err_create_queues; + + err = mlx5_hairpin_pair_queues(hp); + if (err) + goto err_pair_queues; + + return hp; + +err_pair_queues: + mlx5_hairpin_destroy_queues(hp); +err_create_queues: + kfree(hp); + return ERR_PTR(err); +} + +void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp) +{ + mlx5_hairpin_unpair_queues(hp); + mlx5_hairpin_destroy_queues(hp); + kfree(hp); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c new file mode 100644 index 000000000..8b97066dd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/io-mapping.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) +{ + u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {0}; + int err; + + MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *uarn = MLX5_GET(alloc_uar_out, out, uar); + return err; +} +EXPORT_SYMBOL(mlx5_cmd_alloc_uar); + +int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_uar_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {0}; + + MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR); + MLX5_SET(dealloc_uar_in, in, uar, uarn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_cmd_free_uar); + +static int uars_per_sys_page(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, uar_4k)) + return MLX5_CAP_GEN(mdev, num_of_uars_per_page); + + return 1; +} + +static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index) +{ + u32 system_page_index; + + if (MLX5_CAP_GEN(mdev, uar_4k)) + system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT); + else + system_page_index = index; + + return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index; +} + +static void up_rel_func(struct kref *kref) +{ + struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count); + + list_del(&up->list); + iounmap(up->map); + if (mlx5_cmd_free_uar(up->mdev, up->index)) + mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); + kfree(up->reg_bitmap); + kfree(up->fp_bitmap); + kfree(up); +} + +static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev, + bool map_wc) +{ + struct mlx5_uars_page *up; + int err = -ENOMEM; + phys_addr_t pfn; + int bfregs; + int i; + + bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR; + up = kzalloc(sizeof(*up), GFP_KERNEL); + if (!up) + return ERR_PTR(err); + + up->mdev = mdev; + up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL); + if (!up->reg_bitmap) + goto error1; + + up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL); + if (!up->fp_bitmap) + goto error1; + + for (i = 0; i < bfregs; i++) + if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR) + set_bit(i, up->reg_bitmap); + else + set_bit(i, up->fp_bitmap); + + up->bfregs = bfregs; + up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; + up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; + + err = mlx5_cmd_alloc_uar(mdev, &up->index); + if (err) { + mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); + goto error1; + } + + pfn = uar2pfn(mdev, up->index); + if (map_wc) { + up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -EAGAIN; + goto error2; + } + } else { + up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -ENOMEM; + goto error2; + } + } + kref_init(&up->ref_count); + mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n", + up->index, up->bfregs); + return up; + +error2: + if (mlx5_cmd_free_uar(mdev, up->index)) + mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index); +error1: + kfree(up->fp_bitmap); + kfree(up->reg_bitmap); + kfree(up); + return ERR_PTR(err); +} + +struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_uars_page *ret; + + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + if (!list_empty(&mdev->priv.bfregs.reg_head.list)) { + ret = list_first_entry(&mdev->priv.bfregs.reg_head.list, + struct mlx5_uars_page, list); + kref_get(&ret->ref_count); + goto out; + } + ret = alloc_uars_page(mdev, false); + if (IS_ERR(ret)) + goto out; + list_add(&ret->list, &mdev->priv.bfregs.reg_head.list); +out: + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); + + return ret; +} +EXPORT_SYMBOL(mlx5_get_uars_page); + +void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up) +{ + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); +} +EXPORT_SYMBOL(mlx5_put_uars_page); + +static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi) +{ + /* return the offset in bytes from the start of the page to the + * blue flame area of the UAR + */ + return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE + + (dbi % MLX5_BFREGS_PER_UAR) * + (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET; +} + +static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) +{ + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct list_head *head; + unsigned long *bitmap; + unsigned int *avail; + struct mutex *lock; /* pointer to right mutex */ + int dbi; + + bfregs = &mdev->priv.bfregs; + if (map_wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; + } + mutex_lock(lock); + if (list_empty(head)) { + up = alloc_uars_page(mdev, map_wc); + if (IS_ERR(up)) { + mutex_unlock(lock); + return PTR_ERR(up); + } + list_add(&up->list, head); + } else { + up = list_entry(head->next, struct mlx5_uars_page, list); + kref_get(&up->ref_count); + } + if (fast_path) { + bitmap = up->fp_bitmap; + avail = &up->fp_avail; + } else { + bitmap = up->reg_bitmap; + avail = &up->reg_avail; + } + dbi = find_first_bit(bitmap, up->bfregs); + clear_bit(dbi, bitmap); + (*avail)--; + if (!(*avail)) + list_del(&up->list); + + bfreg->map = up->map + map_offset(mdev, dbi); + bfreg->up = up; + bfreg->wc = map_wc; + bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR; + mutex_unlock(lock); + + return 0; +} + +int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) +{ + int err; + + err = alloc_bfreg(mdev, bfreg, map_wc, fast_path); + if (!err) + return 0; + + if (err == -EAGAIN && map_wc) + return alloc_bfreg(mdev, bfreg, false, fast_path); + + return err; +} +EXPORT_SYMBOL(mlx5_alloc_bfreg); + +static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev, + struct mlx5_uars_page *up, + struct mlx5_sq_bfreg *bfreg) +{ + unsigned int uar_idx; + unsigned int bfreg_idx; + unsigned int bf_reg_size; + + bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size); + + uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT; + bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size; + + return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx; +} + +void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg) +{ + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct mutex *lock; /* pointer to right mutex */ + unsigned int dbi; + bool fp; + unsigned int *avail; + unsigned long *bitmap; + struct list_head *head; + + bfregs = &mdev->priv.bfregs; + if (bfreg->wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; + } + up = bfreg->up; + dbi = addr_to_dbi_in_syspage(mdev, up, bfreg); + fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR; + if (fp) { + avail = &up->fp_avail; + bitmap = up->fp_bitmap; + } else { + avail = &up->reg_avail; + bitmap = up->reg_bitmap; + } + mutex_lock(lock); + (*avail)++; + set_bit(dbi, bitmap); + if (*avail == 1) + list_add_tail(&up->list, head); + + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(lock); +} +EXPORT_SYMBOL(mlx5_free_bfreg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c new file mode 100644 index 000000000..b02af317c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -0,0 +1,1203 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/export.h> +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> +#include "mlx5_core.h" + +/* Mutex to hold while enabling or disabling RoCE */ +static DEFINE_MUTEX(mlx5_roce_en_lock); + +static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {0}; + + MLX5_SET(query_vport_state_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_STATE); + MLX5_SET(query_vport_state_in, in, op_mod, opmod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_state_in, in, other_vport, 1); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); +} + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + + return MLX5_GET(query_vport_state_out, out, state); +} + +int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u8 state) +{ + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0}; + + MLX5_SET(modify_vport_state_in, in, opcode, + MLX5_CMD_OP_MODIFY_VPORT_STATE); + MLX5_SET(modify_vport_state_in, in, op_mod, opmod); + MLX5_SET(modify_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_vport_state_in, in, other_vport, 1); + MLX5_SET(modify_vport_state_in, in, admin_state, state); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0}; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); +} + +static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0}; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); +} + +int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 *min_inline) +{ + u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0}; + int err; + + err = mlx5_query_nic_vport_context(mdev, vport, out, sizeof(out)); + if (!err) + *min_inline = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.min_wqe_inline_mode); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline); + +void mlx5_query_min_inline(struct mlx5_core_dev *mdev, + u8 *min_inline_mode) +{ + switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_L2: + *min_inline_mode = MLX5_INLINE_MODE_L2; + break; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode); + break; + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + *min_inline_mode = MLX5_INLINE_MODE_NONE; + break; + } +} +EXPORT_SYMBOL_GPL(mlx5_query_min_inline); + +int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 min_inline) +{ + u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0}; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_ctx; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.min_inline, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + MLX5_SET(nic_vport_context, nic_vport_ctx, + min_wqe_inline_mode, min_inline); + + return mlx5_modify_nic_vport_context(mdev, in, inlen); +} + +int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u8 *out_addr; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context.permanent_address); + + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (!err) + ether_addr_copy(addr, &out_addr[2]); + + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address); + +int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + void *nic_vport_ctx; + u8 *perm_mac; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.permanent_address, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx, + permanent_address); + + ether_addr_copy(&perm_mac[2], addr); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address); + +int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu) +{ + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out, outlen); + if (!err) + *mtu = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.mtu); + + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mtu); + +int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); + MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, mtu); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu); + +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0}; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(addr_list[i], mac_addr); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list); + +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, list_type); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(curr_mac, addr_list[i]); + } + + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); + +int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, + u32 vport, + u16 vlans[], + int *size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int req_list_size; + int max_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *size; + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, + MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *size = req_list_size; + for (i = 0; i < req_list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans); + +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); + } + + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); + +int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, + u64 *system_image_guid) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + mlx5_query_nic_vport_context(mdev, 0, out, outlen); + + *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out, + nic_vport_context.system_image_guid); + + kvfree(out); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid); + +int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + mlx5_query_nic_vport_context(mdev, 0, out, outlen); + + *node_guid = MLX5_GET64(query_nic_vport_context_out, out, + nic_vport_context.node_guid); + + kvfree(out); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); + +int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, + u32 vport, u64 node_guid) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_context; + void *in; + int err; + + if (!vport) + return -EINVAL; + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EACCES; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.node_guid, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport); + + nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} + +int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, + u16 *qkey_viol_cntr) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + mlx5_query_nic_vport_context(mdev, 0, out, outlen); + + *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.qkey_violation_counter); + + kvfree(out); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr); + +int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 gid_index, + union ib_gid *gid) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_in); + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); + int is_group_manager; + void *out = NULL; + void *in = NULL; + union ib_gid *tmp; + int tbsz; + int nout; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + tbsz = mlx5_get_gid_table_len(MLX5_CAP_GEN(dev, gid_table_size)); + mlx5_core_dbg(dev, "vf_num %d, index %d, gid_table_size %d\n", + vf_num, gid_index, tbsz); + + if (gid_index > tbsz && gid_index != 0xffff) + return -EINVAL; + + if (gid_index == 0xffff) + nout = tbsz; + else + nout = 1; + + out_sz += nout * sizeof(*gid); + + in = kzalloc(in_sz, GFP_KERNEL); + out = kzalloc(out_sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_hca_vport_gid_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_GID); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_gid_in, in, vport_number, vf_num); + MLX5_SET(query_hca_vport_gid_in, in, other_vport, 1); + } else { + err = -EPERM; + goto out; + } + } + MLX5_SET(query_hca_vport_gid_in, in, gid_index, gid_index); + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_gid_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto out; + + tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); + gid->global.subnet_prefix = tmp->global.subnet_prefix; + gid->global.interface_id = tmp->global.interface_id; + +out: + kfree(in); + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid); + +int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 pkey_index, + u16 *pkey) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_in); + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_out); + int is_group_manager; + void *out = NULL; + void *in = NULL; + void *pkarr; + int nout; + int tbsz; + int err; + int i; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + + tbsz = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)); + if (pkey_index > tbsz && pkey_index != 0xffff) + return -EINVAL; + + if (pkey_index == 0xffff) + nout = tbsz; + else + nout = 1; + + out_sz += nout * MLX5_ST_SZ_BYTES(pkey); + + in = kzalloc(in_sz, GFP_KERNEL); + out = kzalloc(out_sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_hca_vport_pkey_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_pkey_in, in, vport_number, vf_num); + MLX5_SET(query_hca_vport_pkey_in, in, other_vport, 1); + } else { + err = -EPERM; + goto out; + } + } + MLX5_SET(query_hca_vport_pkey_in, in, pkey_index, pkey_index); + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_pkey_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto out; + + pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey); + for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey)) + *pkey = MLX5_GET_PR(pkey, pkarr, pkey); + +out: + kfree(in); + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey); + +int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + u16 vf_num, + struct mlx5_hca_vport_context *rep) +{ + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out); + int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {0}; + int is_group_manager; + void *out; + void *ctx; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT); + + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_context_in, in, other_vport, 1); + MLX5_SET(query_hca_vport_context_in, in, vport_number, vf_num); + } else { + err = -EPERM; + goto ex; + } + } + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_context_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + if (err) + goto ex; + + ctx = MLX5_ADDR_OF(query_hca_vport_context_out, out, hca_vport_context); + rep->field_select = MLX5_GET_PR(hca_vport_context, ctx, field_select); + rep->sm_virt_aware = MLX5_GET_PR(hca_vport_context, ctx, sm_virt_aware); + rep->has_smi = MLX5_GET_PR(hca_vport_context, ctx, has_smi); + rep->has_raw = MLX5_GET_PR(hca_vport_context, ctx, has_raw); + rep->policy = MLX5_GET_PR(hca_vport_context, ctx, vport_state_policy); + rep->phys_state = MLX5_GET_PR(hca_vport_context, ctx, + port_physical_state); + rep->vport_state = MLX5_GET_PR(hca_vport_context, ctx, vport_state); + rep->port_physical_state = MLX5_GET_PR(hca_vport_context, ctx, + port_physical_state); + rep->port_guid = MLX5_GET64_PR(hca_vport_context, ctx, port_guid); + rep->node_guid = MLX5_GET64_PR(hca_vport_context, ctx, node_guid); + rep->cap_mask1 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask1); + rep->cap_mask1_perm = MLX5_GET_PR(hca_vport_context, ctx, + cap_mask1_field_select); + rep->cap_mask2 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask2); + rep->cap_mask2_perm = MLX5_GET_PR(hca_vport_context, ctx, + cap_mask2_field_select); + rep->lid = MLX5_GET_PR(hca_vport_context, ctx, lid); + rep->init_type_reply = MLX5_GET_PR(hca_vport_context, ctx, + init_type_reply); + rep->lmc = MLX5_GET_PR(hca_vport_context, ctx, lmc); + rep->subnet_timeout = MLX5_GET_PR(hca_vport_context, ctx, + subnet_timeout); + rep->sm_lid = MLX5_GET_PR(hca_vport_context, ctx, sm_lid); + rep->sm_sl = MLX5_GET_PR(hca_vport_context, ctx, sm_sl); + rep->qkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx, + qkey_violation_counter); + rep->pkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx, + pkey_violation_counter); + rep->grh_required = MLX5_GET_PR(hca_vport_context, ctx, grh_required); + rep->sys_image_guid = MLX5_GET64_PR(hca_vport_context, ctx, + system_image_guid); + +ex: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context); + +int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, + u64 *sys_image_guid) +{ + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep); + if (!err) + *sys_image_guid = rep->sys_image_guid; + + kfree(rep); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid); + +int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, + u64 *node_guid) +{ + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep); + if (!err) + *node_guid = rep->node_guid; + + kfree(rep); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); + +int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + int *promisc_uc, + int *promisc_mc, + int *promisc_all) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + *promisc_uc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_uc); + *promisc_mc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_mc); + *promisc_all = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_all); + +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc); + +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_uc, promisc_uc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_mc, promisc_mc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_all, promisc_all); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); + +enum { + UC_LOCAL_LB, + MC_LOCAL_LB +}; + +int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + if (!MLX5_CAP_GEN(mdev, disable_local_lb_mc) && + !MLX5_CAP_GEN(mdev, disable_local_lb_uc)) + return 0; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.disable_mc_local_lb, !enable); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.disable_uc_local_lb, !enable); + + if (MLX5_CAP_GEN(mdev, disable_local_lb_mc)) + MLX5_SET(modify_nic_vport_context_in, in, + field_select.disable_mc_local_lb, 1); + + if (MLX5_CAP_GEN(mdev, disable_local_lb_uc)) + MLX5_SET(modify_nic_vport_context_in, in, + field_select.disable_uc_local_lb, 1); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + if (!err) + mlx5_core_dbg(mdev, "%s local_lb\n", + enable ? "enable" : "disable"); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_update_local_lb); + +int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status) +{ + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 *out; + int value; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out, outlen); + if (err) + goto out; + + value = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.disable_mc_local_lb) << MC_LOCAL_LB; + + value |= MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.disable_uc_local_lb) << UC_LOCAL_LB; + + *status = !value; + +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb); + +enum mlx5_vport_roce_state { + MLX5_VPORT_ROCE_DISABLED = 0, + MLX5_VPORT_ROCE_ENABLED = 1, +}; + +static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev, + enum mlx5_vport_roce_state state) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1); + MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en, + state); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} + +int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev) +{ + int err = 0; + + mutex_lock(&mlx5_roce_en_lock); + if (!mdev->roce.roce_en) + err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED); + + if (!err) + mdev->roce.roce_en++; + mutex_unlock(&mlx5_roce_en_lock); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce); + +int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) +{ + int err = 0; + + mutex_lock(&mlx5_roce_en_lock); + if (mdev->roce.roce_en) { + mdev->roce.roce_en--; + if (mdev->roce.roce_en == 0) + err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); + + if (err) + mdev->roce.roce_en++; + } + mutex_unlock(&mlx5_roce_en_lock); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); + +int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, + int vf, u8 port_num, void *out, + size_t out_sz) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in); + int is_group_manager; + void *in; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + in = kvzalloc(in_sz, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + return err; + } + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + MLX5_SET(query_vport_counter_in, in, vport_number, vf + 1); + } else { + err = -EPERM; + goto free; + } + } + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_vport_counter_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); +free: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); + +int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, + u64 *rx_discard_vport_down, + u64 *tx_discard_vport_down) +{ + u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0}; + int err; + + MLX5_SET(query_vnic_env_in, in, opcode, + MLX5_CMD_OP_QUERY_VNIC_ENV); + MLX5_SET(query_vnic_env_in, in, op_mod, 0); + MLX5_SET(query_vnic_env_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vnic_env_in, in, other_vport, 1); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *rx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out, + vport_env.receive_discard_vport_down); + *tx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out, + vport_env.transmit_discard_vport_down); + return 0; +} + +int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + int vf, + struct mlx5_hca_vport_context *req) +{ + int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in); + u8 out[MLX5_ST_SZ_BYTES(modify_hca_vport_context_out)]; + int is_group_manager; + void *in; + int err; + void *ctx; + + mlx5_core_dbg(dev, "vf %d\n", vf); + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + memset(out, 0, sizeof(out)); + MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(modify_hca_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_hca_vport_context_in, in, vport_number, vf); + } else { + err = -EPERM; + goto ex; + } + } + + if (MLX5_CAP_GEN(dev, num_ports) > 1) + MLX5_SET(modify_hca_vport_context_in, in, port_num, port_num); + + ctx = MLX5_ADDR_OF(modify_hca_vport_context_in, in, hca_vport_context); + MLX5_SET(hca_vport_context, ctx, field_select, req->field_select); + MLX5_SET(hca_vport_context, ctx, sm_virt_aware, req->sm_virt_aware); + MLX5_SET(hca_vport_context, ctx, has_smi, req->has_smi); + MLX5_SET(hca_vport_context, ctx, has_raw, req->has_raw); + MLX5_SET(hca_vport_context, ctx, vport_state_policy, req->policy); + MLX5_SET(hca_vport_context, ctx, port_physical_state, req->phys_state); + MLX5_SET(hca_vport_context, ctx, vport_state, req->vport_state); + MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid); + MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid); + MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1); + MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select, req->cap_mask1_perm); + MLX5_SET(hca_vport_context, ctx, cap_mask2, req->cap_mask2); + MLX5_SET(hca_vport_context, ctx, cap_mask2_field_select, req->cap_mask2_perm); + MLX5_SET(hca_vport_context, ctx, lid, req->lid); + MLX5_SET(hca_vport_context, ctx, init_type_reply, req->init_type_reply); + MLX5_SET(hca_vport_context, ctx, lmc, req->lmc); + MLX5_SET(hca_vport_context, ctx, subnet_timeout, req->subnet_timeout); + MLX5_SET(hca_vport_context, ctx, sm_lid, req->sm_lid); + MLX5_SET(hca_vport_context, ctx, sm_sl, req->sm_sl); + MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter); + MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter); + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); +ex: + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context); + +int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, + struct mlx5_core_dev *port_mdev) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + err = mlx5_nic_vport_enable_roce(port_mdev); + if (err) + goto free; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, + MLX5_CAP_GEN(master_mdev, vhca_id)); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliation_criteria, + MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria)); + + err = mlx5_modify_nic_vport_context(port_mdev, in, inlen); + if (err) + mlx5_nic_vport_disable_roce(port_mdev); + +free: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_affiliate_multiport); + +int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, 0); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliation_criteria, 0); + + err = mlx5_modify_nic_vport_context(port_mdev, in, inlen); + if (!err) + mlx5_nic_vport_disable_roce(port_mdev); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c new file mode 100644 index 000000000..ddca327e8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include "wq.h" +#include "mlx5_core.h" + +u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq) +{ + return (u32)wq->fbc.sz_m1 + 1; +} + +u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) +{ + return wq->fbc.sz_m1 + 1; +} + +u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq) +{ + return (u32)wq->fbc.sz_m1 + 1; +} + +static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq) +{ + return mlx5_wq_cyc_get_size(wq) << wq->fbc.log_stride; +} + +static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq) +{ + return mlx5_wq_cyc_get_byte_size(&wq->rq) + + mlx5_wq_cyc_get_byte_size(&wq->sq); +} + +static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq) +{ + return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride; +} + +static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq) +{ + return mlx5_wq_ll_get_size(wq) << wq->fbc.log_stride; +} + +int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_cyc *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; + int err; + + mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride), + MLX5_GET(wq, wqc, log_wq_sz), + fbc); + wq->sz = wq->fbc.sz_m1 + 1; + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); + goto err_db_free; + } + + fbc->frag_buf = wq_ctrl->buf; + wq->db = wq_ctrl->db.db; + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf, + struct mlx5_wq_qp *qp) +{ + struct mlx5_frag_buf_ctrl *sq_fbc; + struct mlx5_frag_buf *rqb, *sqb; + + rqb = &qp->rq.fbc.frag_buf; + *rqb = *buf; + rqb->size = mlx5_wq_cyc_get_byte_size(&qp->rq); + rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE); + + sq_fbc = &qp->sq.fbc; + sqb = &sq_fbc->frag_buf; + *sqb = *buf; + sqb->size = mlx5_wq_cyc_get_byte_size(&qp->sq); + sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE); + sqb->frags += rqb->npages; /* first part is for the rq */ + if (sq_fbc->strides_offset) + sqb->frags--; +} + +int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *qpc, struct mlx5_wq_qp *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + u16 sq_strides_offset; + u32 rq_pg_remainder; + int err; + + mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4, + MLX5_GET(qpc, qpc, log_rq_size), + &wq->rq.fbc); + + rq_pg_remainder = mlx5_wq_cyc_get_byte_size(&wq->rq) % PAGE_SIZE; + sq_strides_offset = rq_pg_remainder / MLX5_SEND_WQE_BB; + + mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB), + MLX5_GET(qpc, qpc, log_sq_size), + sq_strides_offset, + &wq->sq.fbc); + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); + goto err_db_free; + } + + mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq); + + wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR]; + wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR]; + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *cqc, struct mlx5_cqwq *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + int err; + + mlx5_core_init_cq_frag_buf(&wq->fbc, cqc); + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq), + &wq_ctrl->buf, + param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", + err); + goto err_db_free; + } + + wq->fbc.frag_buf = wq_ctrl->buf; + wq->db = wq_ctrl->db.db; + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_ll *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; + struct mlx5_wqe_srq_next_seg *next_seg; + int err; + int i; + + mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride), + MLX5_GET(wq, wqc, log_wq_sz), + fbc); + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); + goto err_db_free; + } + + wq->fbc.frag_buf = wq_ctrl->buf; + wq->db = wq_ctrl->db.db; + + for (i = 0; i < fbc->sz_m1; i++) { + next_seg = mlx5_wq_ll_get_wqe(wq, i); + next_seg->next_wqe_index = cpu_to_be16(i + 1); + } + next_seg = mlx5_wq_ll_get_wqe(wq, i); + wq->tail_next = &next_seg->next_wqe_index; + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl) +{ + mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf); + mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h new file mode 100644 index 000000000..b1293d153 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_WQ_H__ +#define __MLX5_WQ_H__ + +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> + +struct mlx5_wq_param { + int buf_numa_node; + int db_numa_node; +}; + +struct mlx5_wq_ctrl { + struct mlx5_core_dev *mdev; + struct mlx5_frag_buf buf; + struct mlx5_db db; +}; + +struct mlx5_wq_cyc { + struct mlx5_frag_buf_ctrl fbc; + __be32 *db; + u16 sz; + u16 wqe_ctr; + u16 cur_sz; +}; + +struct mlx5_wq_qp { + struct mlx5_wq_cyc rq; + struct mlx5_wq_cyc sq; +}; + +struct mlx5_cqwq { + struct mlx5_frag_buf_ctrl fbc; + __be32 *db; + u32 cc; /* consumer counter */ +}; + +struct mlx5_wq_ll { + struct mlx5_frag_buf_ctrl fbc; + __be32 *db; + __be16 *tail_next; + u16 head; + u16 wqe_ctr; + u16 cur_sz; +}; + +int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_cyc *wq, + struct mlx5_wq_ctrl *wq_ctrl); +u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq); + +int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *qpc, struct mlx5_wq_qp *wq, + struct mlx5_wq_ctrl *wq_ctrl); + +int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *cqc, struct mlx5_cqwq *wq, + struct mlx5_wq_ctrl *wq_ctrl); +u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq); + +int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_ll *wq, + struct mlx5_wq_ctrl *wq_ctrl); +u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq); + +void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl); + +static inline int mlx5_wq_cyc_is_full(struct mlx5_wq_cyc *wq) +{ + return wq->cur_sz == wq->sz; +} + +static inline int mlx5_wq_cyc_missing(struct mlx5_wq_cyc *wq) +{ + return wq->sz - wq->cur_sz; +} + +static inline int mlx5_wq_cyc_is_empty(struct mlx5_wq_cyc *wq) +{ + return !wq->cur_sz; +} + +static inline void mlx5_wq_cyc_push(struct mlx5_wq_cyc *wq) +{ + wq->wqe_ctr++; + wq->cur_sz++; +} + +static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u8 n) +{ + wq->wqe_ctr += n; + wq->cur_sz += n; +} + +static inline void mlx5_wq_cyc_pop(struct mlx5_wq_cyc *wq) +{ + wq->cur_sz--; +} + +static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq) +{ + *wq->db = cpu_to_be32(wq->wqe_ctr); +} + +static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr) +{ + return ctr & wq->fbc.sz_m1; +} + +static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq) +{ + return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr); +} + +static inline u16 mlx5_wq_cyc_get_tail(struct mlx5_wq_cyc *wq) +{ + return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr - wq->cur_sz); +} + +static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix) +{ + return mlx5_frag_buf_get_wqe(&wq->fbc, ix); +} + +static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix) +{ + return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1; +} + +static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2) +{ + int equal = (cc1 == cc2); + int smaller = 0x8000 & (cc1 - cc2); + + return !equal && !smaller; +} + +static inline u32 mlx5_cqwq_ctr2ix(struct mlx5_cqwq *wq, u32 ctr) +{ + return ctr & wq->fbc.sz_m1; +} + +static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) +{ + return mlx5_cqwq_ctr2ix(wq, wq->cc); +} + +static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) +{ + return mlx5_frag_buf_get_wqe(&wq->fbc, ix); +} + +static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr) +{ + return ctr >> wq->fbc.log_sz; +} + +static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq) +{ + return mlx5_cqwq_get_ctr_wrap_cnt(wq, wq->cc); +} + +static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq) +{ + wq->cc++; +} + +static inline void mlx5_cqwq_update_db_record(struct mlx5_cqwq *wq) +{ + *wq->db = cpu_to_be32(wq->cc & 0xffffff); +} + +static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq) +{ + u32 ci = mlx5_cqwq_get_ci(wq); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); + u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; + u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; + + if (cqe_ownership_bit != sw_ownership_val) + return NULL; + + /* ensure cqe content is read after cqe ownership bit */ + dma_rmb(); + + return cqe; +} + +static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq) +{ + return wq->cur_sz == wq->fbc.sz_m1; +} + +static inline int mlx5_wq_ll_is_empty(struct mlx5_wq_ll *wq) +{ + return !wq->cur_sz; +} + +static inline int mlx5_wq_ll_missing(struct mlx5_wq_ll *wq) +{ + return wq->fbc.sz_m1 - wq->cur_sz; +} + +static inline void *mlx5_wq_ll_get_wqe(struct mlx5_wq_ll *wq, u16 ix) +{ + return mlx5_frag_buf_get_wqe(&wq->fbc, ix); +} + +static inline void mlx5_wq_ll_push(struct mlx5_wq_ll *wq, u16 head_next) +{ + wq->head = head_next; + wq->wqe_ctr++; + wq->cur_sz++; +} + +static inline void mlx5_wq_ll_pop(struct mlx5_wq_ll *wq, __be16 ix, + __be16 *next_tail_next) +{ + *wq->tail_next = ix; + wq->tail_next = next_tail_next; + wq->cur_sz--; +} + +static inline void mlx5_wq_ll_update_db_record(struct mlx5_wq_ll *wq) +{ + *wq->db = cpu_to_be32(wq->wqe_ctr); +} + +#endif /* __MLX5_WQ_H__ */ |